import os
import pandas as pd
import csv
import numpy as np
import math
from matplotlib.lines import Line2D
import matplotlib as mpl
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import random
from scipy.spatial import distance
from scipy import stats
from sklearn import manifold
from sklearn.decomposition import PCA
import statsmodels.stats.multitest as sm

Summary

This file includes the results of a microbiome analysis performed on samples taken from four individuals that were originally used to determine the “Impact of DNA source on genetic variant detection from human whole-genome sequencing data”.

This included blood, saliva and buccal samples taken from four individuals (blood samples were taken at a different time than saliva and buccal samples). Additionally, a methylation-based enrichment for eukaryotic DNA was performed on the saliva and buccal samples.

Basic processing steps

Obtaining reads

Fastq.gz files were downloaded from the ENA database, project accession number PRJNA523344

Kneaddata

Kneaddata was used for quality control and removal of human sequences. This included:
- Trimmomatic 0.39: “SLIDINGWINDOW:4:20 MINLEN:50”
- Bowtie2 with the GRCh38_PhiX database (to remove human and PhiX reads): “–fast –dovetail”

parallel -j 2 --link 'kneaddata -i {1} -i {2} -o kneaddata_out/ \
-db /home/shared/bowtiedb/GRCh38_PhiX --trimmomatic /home/robyn/Trimmomatic-0.39/ \
-t 40 --trimmomatic-options "SLIDINGWINDOW:4:20 MINLEN:50" \
--bowtie2-options "--fast --dovetail" --remove-intermediate-output' \
 ::: raw_data/*_1.fastq.gz ::: raw_data/*_2.fastq.gz
 
 mkdir kneaddata_out/contam_seq
mv kneaddata_out/*_contam*.fastq kneaddata_out/contam_seq
kneaddata_read_count_table --input kneaddata_out --output kneaddata_read_counts.txt
concat_paired_end.pl -p 4 --no_R_match -o cat_reads kneaddata_out/*_paired_*.fastq 

Reads kept after Kneaddata

#3 
#set up colors function (to get up to 120 colors, but with up to 40 unique colors)
def get_cols(num):
    colormap_20, colormap_40b, colormap_40c = mpl.cm.get_cmap('tab20', 256), mpl.cm.get_cmap('tab20b', 256), mpl.cm.get_cmap('tab20c', 256)
    norm, norm2 = mpl.colors.Normalize(vmin=0, vmax=19), mpl.colors.Normalize(vmin=20, vmax=39)
    m1, m2, m3 = mpl.cm.ScalarMappable(norm=norm, cmap=colormap_20), mpl.cm.ScalarMappable(norm=norm, cmap=colormap_40b), mpl.cm.ScalarMappable(norm=norm2, cmap=colormap_40c)
    colors_20 = [m1.to_rgba(a) for a in range(20)]
    colors_40 = [m2.to_rgba(a) for a in range(20)]+[m3.to_rgba(a) for a in range(20,40)]
    if num < 21: return colors_20
    elif num < 41: return colors_40
    else: return colors_40+colors_40+colors_40
#and colors and shapes for different participants and body sites
colors_dict, shapes_dict = {'Blood':'#900C3F', 'Saliva':'#016F85', 'Buccal':'#ff8300', 'Saliva_euk':'#02aed1', 'Buccal_euk':'#FFC300'}, {'Huref':'o', 'PGPC-0002':'^', 'PGPC-0005':'*', 'PGPC-0006':'s', 'PGPC-0050':'p'}
#4
#get numbers of reads for different steps
reads = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/read_counts.txt', sep='\t', index_col=3, header=0)
participant_dict, site_dict, full_name_dict = {}, {}, {}
samples = list(reads.index.values)
for s in samples:
    participant_dict[s] = reads.loc[s, 'Participant']
    site_dict[s] = reads.loc[s, 'Body site']
    full_name_dict[s] = reads.loc[s, 'Participant']+' '+reads.loc[s, 'Body site']
total_reads = pd.DataFrame(reads.loc[:, 'cat_reads'])
sample_names = [participant_dict[name]+' '+site_dict[name] for name in samples]
colors = [colors_dict[s] for s in list(reads.loc[:, 'Body site'].values)]
shapes = [shapes_dict[s] for s in list(reads.loc[:, 'Participant'].values)]

Percentage reads remaining

#5
plt.figure(figsize=(10, 5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
plt.sca(ax1)
plt.bar(list(reads.index.values), reads.loc[:, 'Percentage'].values, color=colors, edgecolor='k')
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.ylabel('Reads kept (%)')
plt.xlim([-0.5,20.5])
plt.sca(ax2)
plt.bar(list(reads.index.values), reads.loc[:, 'Percentage'].values, color=colors, edgecolor='k')
plt.semilogy()
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.ylabel('Log reads kept (%)')
handles = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
ax2.legend(handles=handles, bbox_to_anchor=(1.4,1.05))
plt.xlim([-0.5,20.5])
plt.tight_layout()
plt.show()

Number of reads remaining

#6
plt.figure(figsize=(10, 5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
plt.sca(ax1)
plt.bar(list(reads.index.values), reads.loc[:, 'cat_reads'].values, color=colors, edgecolor='k')
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.xlim([-0.5,20.5])
plt.ylabel('Reads remaining')

plt.sca(ax2)
plt.bar(list(reads.index.values), reads.loc[:, 'cat_reads'].values, color=colors, edgecolor='k')
plt.semilogy()
plt.xticks(list(reads.index.values), sample_names, rotation=90)
plt.ylabel('Log reads remaining')
handles = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
ax2.legend(handles=handles, bbox_to_anchor=(1.4,1.05))
plt.xlim([-0.5,20.5])
plt.tight_layout()
plt.show()

Table of reads remaining

#7
reads_remain = reads.loc[:, ['Percentage', 'cat_reads']].rename(index=full_name_dict, columns = {'cat_reads':'Number'})
#8
py$reads_remain %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "350px", height = "400px")
Percentage Number
Huref Blood 0.2319067 1087311
PGPC-0002 Blood 0.5748318 3321664
PGPC-0005 Blood 0.5149286 1903482
PGPC-0006 Blood 0.7146067 3593441
PGPC-0050 Blood 0.6870164 2940390
PGPC-0002 Saliva 3.9289510 16561343
PGPC-0005 Saliva 55.6458826 244058333
PGPC-0006 Saliva 13.1608121 60497393
PGPC-0050 Saliva 56.3700720 257049004
PGPC-0002 Buccal 2.8112226 11462781
PGPC-0005 Buccal 2.8202504 12751614
PGPC-0006 Buccal 5.1521560 22434503
PGPC-0050 Buccal 2.4297107 10667011
PGPC-0002 Saliva_euk 1.2333408 5345888
PGPC-0005 Saliva_euk 8.3104464 38138382
PGPC-0006 Saliva_euk 1.9067564 8786635
PGPC-0050 Saliva_euk 5.6187272 25037552
PGPC-0002 Buccal_euk 0.9385856 4380036
PGPC-0005 Buccal_euk 1.2119820 5584415
PGPC-0006 Buccal_euk 1.5567291 7232280
PGPC-0050 Buccal_euk 1.3836141 6382288

Taxonomic profiling

The taxonomy has been profiled using:
1. HUMAnN
- HUMAnN2 and MetaPhlAn2
- HUMAnN3 and MetaPhlAn3
2. Kraken2 with Bracken
- GTDB (no confidence parameter set) - using the database constructed using Struo, release 89
- GTDB (confidence = 0.1)
- Minikraken v1 (no human genome, no confidence parameter set)
- Minikraken v1 (no human genome, confidence = 0.1)
- Minikraken v2 (with human genome, no confidence parameter set)
- Minikraken v2 (with human genome, confidence = 0.1)
- RefSeq Complete v93 (no confidence parameter set)
- RefSeq Complete v93 (confidence = 0.1)

Commands run:

humann2_databases --download chocophlan full humann_database/
humann2_databases --download uniref uniref90_diamond humann_database/

# Do this for each of uniref_90 and uniref_50

parallel -j 4 'humann2 --threads 10 --input {} --output humann2_out_90/{/.} --protein-database humann_database/uniref_90/' ::: cat_reads/*fastq

mkdir humann2_log_90
python
import os
samples = os.listdir('humann2_out_90')
for sample in samples:
    os.system('cp humann2_out_90/'+sample+'/'+sample+'_humann2_temp/'+sample+'.log humann2_log_90/')
    
quit()

mkdir humann2_final_out_90

humann2_join_tables -s --input humann2_out_90/ --file_name pathabundance --output humann2_final_out_90/humann2_pathabundance.tsv
humann2_join_tables -s --input humann2_out_90/ --file_name pathcoverage --output humann2_final_out_90/humann2_pathcoverage.tsv
humann2_join_tables -s --input humann2_out_90/ --file_name genefamilies --output humann2_final_out_90/humann2_genefamilies.tsv

mkdir humann2_final_out_90/bugs_lists/

python

import os
samples = os.listdir('humann2_out_90')
for sample in samples:
    os.system('cp humann2_out_90/'+sample+'/'+sample+'_humann2_temp/'+sample+'_metaphlan_bugs_list.tsv humann2_final_out_90/bugs_lists/')

quit()

merge_metaphlan_tables.py humann2_final_out_90/bugs_lists/*tsv > humann2_final_out_90/metaphlan_merged.tsv
rm -r humann2_final_out_90/bugs_lists

cd humann2_final_out_90/
humann2_renorm_table --input humann2_pathabundance.tsv --units relab --output humann2_pathabundance_relab.tsv
humann2_split_stratified_table --input humann2_pathabundance_relab.tsv --output ./

humann2_renorm_table --input humann2_genefamilies.tsv --units relab --output humann2_genefamilies_relab.tsv
humann2_split_stratified_table --input humann2_genefamilies_relab.tsv --output ./
pip install humann
pip install metaphlan

humann_databases --download chocophlan full humann_databases/humann3_databases/ --update-config yes
humann_databases --download uniref uniref90_diamond humann_databases/humann3_databases/ --update-config yes
humann_databases --download uniref uniref50_diamond humann_databases/humann3_databases/ --update-config yes
humann_databases --download utility_mapping full humann_databases/humann3_databases/ --update-config yes

wget https://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
tar xvf diamond-linux64.tar.gz

parallel -j 1 'humann --input {} --output human_metagenome/humann3_out/ --threads 12' ::: human_metagenome/cat_reads/*.fastq

merge_metaphlan_tables.py humann_final_out/bugs_lists/*tsv > humann3_final_out/metaphlan_merged.tsv
rm -r humann3_final_out/bugs_lists

humann_join_tables -s --input humann3_out/ --file_name pathabundance --output humann3_final_out/humann3_pathabundance.tsv
humann_join_tables -s --input humann3_out/ --file_name pathcoverage --output humann3_final_out/humann3_pathcoverage.tsv
humann_join_tables -s --input humann3_out/ --file_name genefamilies --output humann3_final_out/humann3_genefamilies.tsv

humann_renorm_table --input humann3_pathabundance.tsv --units relab --output humann3_pathabundance_relab.tsv
humann_split_stratified_table --input humann3_pathabundance_relab.tsv --output ./

humann_renorm_table --input humann3_genefamilies.tsv --units relab --output humann3_genefamilies_relab.tsv
humann_split_stratified_table --input humann3_genefamilies_relab.tsv --output ./

find . -name \*aligned.sam -type f -delete
find . -name \*unaligned.fa -type f -delete
find . -name \*aligned.tsv -type f -delete

humann_renorm_table --input humann3_genefamilies.tsv --output humann3_genefamilies_cpm.tsv --units cpm --update-snames

humann_regroup_table --input humann3_genefamilies_cpm.tsv --output humann3_genefamilies_cpm_ko_50.tsv --groups uniref50_ko
humann_regroup_table --input humann3_genefamilies_cpm.tsv --output humann3_genefamilies_cpm_ko_90.tsv --groups uniref90_ko
#GTDB
wget http://ftp.tue.mpg.de/ebio/projects/struo/GTDB_release89/kraken2/

#RefSeq complete
sudo mount -t ramfs none /scratch/ramdisk/
sudo cp -a $DBNAME /ramdisk
sudo cp -a /home/shared/Kraken2.0.8_Bracken150mer_RefSeqCompleteV93/ /scratch/ramdisk/


#For each database (with and without adding --confidence 0.1:
parallel -j 1 'kraken2 --use-names --threads 10 --db gtdb/kraken2_struo/ --fastq-input {} --report kraken2_out/{/.}.report > kraken2_out/{/.}.kraken' ::: human_metagenome/cat_reads/*.fastq

parallel -j 1 'bracken -d gtdb/kraken2_struo/ -i {} -l S -o {.}.bracken' ::: kraken2_report/*.kreport

MetaPhlAn2 taxonomy nMDS plots

#9
#get the taxonomy file and sort it to strain and genus level
taxa = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/metaphlan_merged.tsv', sep='\t', header=0, index_col=0)
tax_names = list(taxa.index.values)
keeping = []
for a in range(len(tax_names)):
    if 't__' in tax_names[a]:
        keeping.append(True)
    elif 'unclassified' in tax_names[a]:
        keeping.append(True)
    else:
        keeping.append(False)
strain = taxa.loc[keeping, :]
strain_names = list(strain.index.values)
strain_dict = {}
for i in range(len(strain_names)):
    strain_dict[strain_names[i]] = strain_names[i].split('|s__')[0].split('|g__')[1]
genus = strain.rename(index=strain_dict)
genus = genus.groupby(by=genus.index, axis=0).sum()
#10
#define the function that calculates the nmds plots
def transform_for_NMDS(df, dist_met='braycurtis'):
    X = df.iloc[0:].values
    y = df.iloc[:,0].values
    seed = np.random.RandomState(seed=3)
    X_true = X
    similarities = distance.cdist(X_true, X_true, dist_met)
    mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
    #print(similarities)
    pos = mds.fit(similarities).embedding_
    nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
                        dissimilarity="precomputed", random_state=seed, n_jobs=1,
                        n_init=1)
    npos = nmds.fit_transform(similarities, init=pos)
    # Rescale the data
    pos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((pos ** 2).sum())
    npos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((npos ** 2).sum())
    # Rotate the data
    clf = PCA()
    X_true = clf.fit_transform(X_true)
    pos = clf.fit_transform(pos)
    npos = clf.fit_transform(npos)
    return pos, npos, nmds.stress_
strain_t = strain.transpose()
genus_t = genus.transpose()

handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]

Bray-Curtis distance

#11
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'braycurtis')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'braycurtis')

plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
    ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
    ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a],  color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')

ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()

Euclidean distance

#12
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'euclidean')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'euclidean')

plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
    ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
    ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a],  color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')

ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()

Jaccard distance

#13
strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'jaccard')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'jaccard')

plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
    ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
    ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a],  color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')

ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()

MetaPhlAn2 taxonomy relative abundance

Kingdom

Here the relative abundance of taxa calulated by MetaPhlAn2 are plotted at the Kingdom level for each sample.

#14
plt.figure(figsize=(7,5))
ax1 = plt.subplot(111)
plt.bar(list(taxa.columns.values), taxa.loc['k__Viruses', :].values, color='#C70039', edgecolor='k')
plt.bar(list(taxa.columns.values), taxa.loc['k__Bacteria', :].values, bottom=taxa.loc['k__Viruses', :].values, color='#026B81', edgecolor='k')
#plt.xticks(list(taxa.columns.values), sample_names, rotation=90)
empty = []
for x in range(0,21):
    empty.append('')
    ax1.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
plt.xticks(range(0, 21), empty)
plt.xlim([-0.5,20.5])
handles = [Patch(facecolor='#C70039', edgecolor='k', label='Viruses'), Patch(facecolor='#026B81', edgecolor='k', label='Bacteria')]
plt.legend(handles=handles, bbox_to_anchor=(1,1.05))
plt.tight_layout()
plt.show()

Genus

Here the relative abundance of taxa calulated by MetaPhlAn2 are plotted at the Genus level for each sample. Genera with below 1% maximum relative abundance have been removed.

#15
genus = genus[genus.max(axis=1) > 1]
genera = list(genus.index.values)
plt.figure(figsize=(10,5))
ax1 = plt.subplot(111)
gen_colors = get_cols(len(genus.index.values))
handles = []
for g in range(len(genera)):
    this_gen = genus.loc[genera[g], :].values
    if g == 0:
        ax1.bar(list(genus.columns.values), this_gen, color=gen_colors[g], edgecolor='k')
        total = this_gen
    else:
        ax1.bar(list(genus.columns.values), this_gen, bottom=total, color=gen_colors[g], edgecolor='k')
        total = this_gen+total
    handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=genera[g]))
empty = []
for x in range(0,21):
    empty.append('')
    ax1.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
plt.xticks(range(0, 21), empty)
plt.xlim([-0.5,20.5])
plt.legend(handles=handles, bbox_to_anchor=(1,1.05), ncol=2)
plt.tight_layout()
plt.show()

MetaPhlAn3 taxonomy nMDS plots

#get the taxonomy files and sort them to strain and genus level
files = sorted(os.listdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/bugs_lists'))
all_genus, samples, all_strain = [], [], []
for f in files:
  #if f != 'SRR8595490_metaphlan_bugs_list.tsv': continue
  samples.append(f.split('_')[0])
  fn = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/bugs_lists/'+f
  taxa = pd.read_csv(fn, header=0, index_col=0, sep='\t')
  taxa.drop(['additional_species', 'NCBI_tax_id'], axis=1, inplace=True)
  taxa.rename(columns={'relative_abundance':f.split('_')[0]}, inplace=True)
  if taxa.shape[0] == 1:
    all_genus.append(taxa)
    all_strain.append(taxa)
    continue
  tax_names = list(taxa.index.values)
  keeping = []
  unclassified, ind = [], []
  for a in range(len(tax_names)):
    if 's__' in tax_names[a]:
        keeping.append(True)
    elif 'UNKNOWN' in tax_names[a]:
        keeping.append(True)
    elif 'unclassified' in tax_names[a].split('__')[-1]:
        keeping.append(True)
        unclassified.append(tax_names[a])
        ind.append(a)
    else:
        keeping.append(False)
  for a in range(len(unclassified)):
    for b in range(len(unclassified)):
      if unclassified[a] in unclassified[b]:
        keeping[ind[a]] = False
  strain = taxa.loc[keeping, :]
  all_strain.append(strain)
  strain_names = list(strain.index.values)
  strain_dict = {}
  for i in range(len(strain_names)):
    if 's__' in strain_names[i]:
      strain_dict[strain_names[i]] = strain_names[i].split('|s__')[0].split('|g__')[1].replace('_', ' ')
    else:
      strain_dict[strain_names[i]] = strain_names[i].split('__')[1].replace('_', ' ')
  genus = strain.rename(index=strain_dict)
  genus = genus.groupby(by=genus.index, axis=0).sum()
  all_genus.append(genus)
all_genus = pd.concat(all_genus).fillna(value=0)
all_genus = all_genus.groupby(by=all_genus.index, axis=0).sum()
all_strain = pd.concat(all_strain).fillna(value=0)
all_strain = all_strain.groupby(by=all_strain.index, axis=0).sum()
strain_t = all_strain.transpose()
genus_t = all_genus.transpose()

handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]


Bray-Curtis distance

strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'braycurtis')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'braycurtis')

plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
    ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
    ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a],  color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')

ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()


Euclidean distance

strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'euclidean')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'euclidean')

plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
    ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
    ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a],  color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')

ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()


Jaccard distance

strain_pos, strain_npos, strain_stress = transform_for_NMDS(strain_t, 'jaccard')
genus_pos, genus_npos, genus_stress = transform_for_NMDS(genus_t, 'jaccard')

plt.figure(figsize=(10,5))
ax1, ax2 = plt.subplot(121), plt.subplot(122)
for a in range(len(strain_npos)):
    ax1.scatter(strain_npos[a,0], strain_npos[a,1], marker=shapes[a], color=colors[a], s=100)
    ax2.scatter(genus_npos[a,0], genus_npos[a,1], marker=shapes[a],  color=colors[a], s=100)
ax1.set_xlabel('nMDS1')
ax2.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2')
ax1.set_title('Strain')
ax2.set_title('Genus')

ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1.4,1))
plt.tight_layout()
plt.show()


MetaPhlAn3 taxonomy relative abundance


Kingdom

I haven’t included this plot for MetaPhlAn3 as all taxa (that are classified) are classified as bacteria. There are now no viruses in the output, but those samples that were all viral reads in MetaPhlAn2 are now all unclassified/‘unknown’.


Genus

Here the relative abundance of taxa calulated by MetaPhlAn3 are plotted at the Genus level for each sample. Genera with below 1% maximum relative abundance have been removed.

genus = all_genus[all_genus.max(axis=1) > 1]
genera = list(genus.index.values)
plt.figure(figsize=(10,5))
ax1 = plt.subplot(111)
gen_colors = get_cols(len(genus.index.values))
handles = []
for g in range(len(genera)):
    this_gen = genus.loc[genera[g], :].values
    if g == 0:
        ax1.bar(list(genus.columns.values), this_gen, color=gen_colors[g], edgecolor='k')
        total = this_gen
    else:
        ax1.bar(list(genus.columns.values), this_gen, bottom=total, color=gen_colors[g], edgecolor='k')
        total = this_gen+total
    if 'XIII' in genera[g]:
      genera[g] = genera[g].replace('XIII', 'XIII\n')
    handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=genera[g]))
empty = []
for x in range(0,21):
    empty.append('')
    ax1.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
plt.xticks(range(0, 21), empty)
plt.xlim([-0.5,20.5])
plt.legend(handles=handles, bbox_to_anchor=(1,1.05), ncol=2)
plt.tight_layout()
plt.show()


Kraken2 reads classified

#16
#get all samples into dataframes based on the database that they use
folders = sorted(os.listdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2'))
del folders[0]
kraken_columns = {0:'Percent fragments clade', 1:'Number fragments clade', 2:'Number fragments taxon', 3:'Level', 4:'NCBI ID', 5:'Taxon name'}
kraken_all_db, bracken_all_db, all_domains = [], [], {}
for fol in folders:
    if fol == 'db_genera' or fol == 'db_genera_in_common':
      continue
    if not os.path.isdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol):
        continue
    bracken, kraken, bracken_kreport = [], [], []
    bracken_pd, kraken_pd = [], []
    for fi in sorted(os.listdir('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol)):
        if fi[-7:] == 'bracken':
            bracken.append(fi)
        elif fi[-7:] == 'kreport' and 'bracken' not in fi:
            kraken.append(fi)
        elif fi[-7:] == 'kreport':
            bracken_kreport.append(fi)
    for bk in bracken_kreport:
        with open('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol+'/'+bk, 'rU') as f:
            bk = []
            domains = {}
            this_domain, domain_name = [], ''
            for row in csv.reader(f, delimiter='\t'):
                bk.append(row)
                row[5] = row[5].lstrip()
                if row[3] == 'D':
                    if domain_name != '':
                        domains[domain_name] = this_domain
                    this_domain, domain_name = [], row[5]
                else:
                    if row[3] != 'R' and row[3] != 'U' and 'D' not in row[3]:
                        this_domain.append(row[5])
            domains[domain_name] = this_domain
            for domain in domains:
                if domain in all_domains:
                    all_domains[domain] = list(set(all_domains[domain]+domains[domain]))
                else:
                    all_domains[domain] = list(set(domains[domain]))
    for b in bracken:
        if len(b) > 22:
            continue
        sample = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol+'/'+b, sep='\t', header=0, index_col=0)
        b = b.replace('_150', '')
        sample.drop(['taxonomy_id', 'taxonomy_lvl', 'kraken_assigned_reads', 'added_reads', 'fraction_total_reads'], axis=1, inplace=True)
        sample.rename(columns={'new_est_reads':b[:-8]}, inplace=True)
        bracken_pd.append(sample)
    for k in kraken:
        sample = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/'+fol+'/'+k, sep='\t', header=None, index_col=3)
        sample = sample.loc[['U', 'D'], :]
        sample = sample.rename(columns=kraken_columns).drop(['Number fragments taxon', 'NCBI ID'], axis=1).rename(columns={'Percent fragments clade':k[:-8]+'_percent', 'Number fragments clade':k[:-8]+'_reads'}).set_index('Taxon name')
        taxa = list(sample.index.values)
        taxa_dict = {}
        for t in taxa:
            taxa_dict[t] = t.replace(' ', '')
        sample = sample.rename(index=taxa_dict)
        kraken_pd.append(sample)
    bracken = pd.concat(bracken_pd, join='outer')
    kraken = pd.concat(kraken_pd, join='outer')
    kraken = kraken.rename(index={'d__Bacteria':'Bacteria', 'd__Archaea':'Archaea'})
    kraken = kraken.groupby(by=kraken.index, axis=0).sum()
    bracken = bracken.groupby(by=bracken.index, axis=0).sum().fillna(value=0)
    kraken_all_db.append(kraken), bracken_all_db.append(bracken)
#17
x1 = [x for x in range(21)]
x2 = [x+0.3 for x in range(21)]
tax_plotting = ['Archaea', 'Bacteria', 'Eukaryota', 'Viruses', 'unclassified']
color_plotting = ['#EDBB99', '#5499C7', '#7DCEA0', '#F7DC6F', '#CCD1D1']
tax_paper = ['Bacteria', 'Eukaryota', 'Other', 'Unclassified']
color_paper = ['#5499C7', '#7DCEA0', '#CD6155', '#CCD1D1']
from_paper = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/from_paper.csv', header=0, index_col=0)

def get_summary_reads(kraken_db):
    fig = plt.figure(figsize=(15,15))
    ax1, ax2, ax3, ax4, ax5 = plt.subplot(321), plt.subplot(322), plt.subplot(323), plt.subplot(324), plt.subplot(325)
    ax1.set_title('Minikraken V1\n(without human genome)'), ax2.set_title('Minikraken V2\n(with human genome)'), ax3.set_title('GTDB'), ax4.set_title('RefSeq Complete V93')
    ax5.set_title('From paper')
    ax_plot = [ax3, ax3, ax1, ax1, ax2, ax2, ax4, ax4]
    x_plot = [x1, x2, x1, x2, x1, x2, x1, x2]
    axs = [ax1, ax2, ax3, ax4, ax5]
    for s in range(len(samples)):
        if s == 0:
            continue
        bottom = 0
        for t in range(len(tax_paper)):
            ax5.bar(x1[s], from_paper.loc[tax_paper[t], samples[s]], bottom=bottom, color=color_paper[t], edgecolor='k', width=0.6)
            bottom += from_paper.loc[tax_paper[t], samples[s]]
    for db in range(len(kraken_db)):
        ax_using = ax_plot[db]
        x = x_plot[db]
        db = kraken_db[db]
        handles = []
        for tax in range(len(tax_plotting)):
            handles.append(Patch(facecolor=color_plotting[tax], edgecolor='k', label=tax_plotting[tax]))
            tax = tax_plotting[tax]
            if tax not in list(db.index.values):
                db.loc[tax] = [0 for i in range(db.shape[1])]
        handles.append(Patch(facecolor=color_paper[2], edgecolor='k', label='Other'))
        db = db.fillna(value=0)
        for s in range(len(samples)):
            bottom = 0
            for t in range(len(tax_plotting)):
                prop = db.loc[tax_plotting[t], samples[s]+'_reads']
                cat = total_reads.loc[samples[s], 'cat_reads']
                prop = (prop/cat)*100
                ax_using.bar(x[s], prop, bottom=bottom, color=color_plotting[t], edgecolor='k', width=0.3)
                bottom += prop
    ax2.legend(handles=handles, bbox_to_anchor=(1,1.05))
    for ax in axs:
        plt.sca(ax)
        plt.xticks(x1, ['' for x in x1])
        plt.ylim([0, 100])
        plt.xlim([-0.5, 20.5])
    for x in x1:
        ax5.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
        ax4.text(x, -2, sample_names[x], color=colors[x], rotation=90, va='top', ha='center')
    ax1.set_ylabel('Classified (%)'), ax3.set_ylabel('Classified (%)'), ax5.set_ylabel('Classified(%)')
    #plt.tight_layout()
    return

def get_summary_bacteria(kraken_db):
    tax_plotting = ['Bacteria']
    alpha = ['#5499C7', '#F1C40F', '#5499C7', '#F1C40F', '#5499C7', '#F1C40F', '#5499C7', '#F1C40F']
    
    fig = plt.figure(figsize=(10,8))
    ax1, ax2, ax3, ax4 = plt.subplot(221), plt.subplot(222), plt.subplot(223), plt.subplot(224)
    ax1.set_title('Minikraken V1\n(without human genome)'), ax2.set_title('Minikraken V2\n(with human genome)'), ax3.set_title('GTDB'), ax4.set_title('RefSeq Complete V93')
    ax_plot = [ax3, ax3, ax1, ax1, ax2, ax2, ax4, ax4]
    x_plot = [x1, x2, x1, x2, x1, x2, x1, x2]
    axs = [ax1, ax2, ax3, ax4]
    
    for db in range(len(kraken_db)):
        ax_using = ax_plot[db]
        x = x_plot[db]
        alp = alpha[db]
        db = kraken_db[db]
        for tax in range(len(tax_plotting)):
            tax = tax_plotting[tax]
            if tax not in list(db.index.values):
                db.loc[tax] = [0 for i in range(db.shape[1])]
        db = db.fillna(value=0)
        for s in range(len(samples)):
            bottom = 0
            for t in range(len(tax_plotting)):
                prop = db.loc[tax_plotting[t], samples[s]+'_reads']
                cat = total_reads.loc[samples[s], 'cat_reads']
                ax_using.bar(x[s], prop, bottom=bottom, color=alp, edgecolor='k', width=0.3)
                bottom += prop
    handles = []
    handles.append(Patch(facecolor=alpha[0], edgecolor='k', label='No confidence value'))
    handles.append(Patch(facecolor=alpha[1], edgecolor='k', label='Confidence=0.1'))
    ax2.legend(handles=handles, bbox_to_anchor=(1.6,1.03))
    for ax in axs:
        plt.sca(ax)
        plt.xticks(x1, ['' for x in x1])
        plt.semilogy()
        plt.xlim([-0.5, 20.5])
        #plt.ylim([0, 100])
        plt.xlim([-0.5, 20.5])
    for x in x1:
        pl = ((1/21)*(x+1))-0.02
        ax3.text(pl, -0.03, sample_names[x], color=colors[x], rotation=90, va='top', ha='center', transform=ax3.transAxes)
        ax4.text(pl, -0.03, sample_names[x], color=colors[x], rotation=90, va='top', ha='center', transform=ax4.transAxes)
    ax1.set_ylabel('Number of reads'), ax3.set_ylabel('Number of reads')
    #plt.tight_layout()
    return

Percent classified

A summary of the percentage of reads classified as different domains with different databases. Note that the ‘From paper’ plot uses the classifications given in the original paper, where 10,000 unmapped reads were classified using BLAST searches of the NCBI database.

#18
get_summary_reads(kraken_all_db)
plt.show()

Summary of number of bacteria

Summary of the number of reads that are classified as bacteria by each database.

#19
get_summary_bacteria(kraken_all_db)
plt.tight_layout()
plt.show()

Kraken2 taxonomy nMDS plots

These first plots are all separately with the confidence parameter set. See the last tab for those without the confidence parameter set.

#20
db_names = ['gtdb', 'gtdb_conf', 'minikraken', 'minikraken_conf', 'minikraken_human', 'minikraken_human_conf', 'refseq', 'refseq_conf']
bacteria = all_domains['Bacteria']+all_domains['d__Bacteria']
genera, gen_names, genera_1, gen_names_1, strain, gen_sums = [], [], [], [], [], []

for db in range(len(bracken_all_db)):
    d = int(db)
    db = bracken_all_db[db]
    species = list(db.index.values)
    keeping = []
    species_dict = {}
    for sp in species:
        if sp in bacteria:
            keeping.append(True)
            new_sp = sp.split('__')
            if len(new_sp) > 1:
                new_sp = new_sp[1]
            else:
                new_sp = new_sp[0]
            species_dict[sp] = new_sp.split(' ')[0].replace("'", '')
        else:
            keeping.append(False)
    in_len = db.shape[0]
    db = db.loc[keeping, :]
    strain.append(db)
    db = db.rename(index=species_dict)
    db = db.groupby(by=db.index, axis=0).sum()
    sums = db.sum(axis=0)
    gen_sums.append(sums)
    db = db.divide(sums, axis=1).multiply(100)
    genera.append(db)
    db.to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/'+db_names[d]+'.csv')
    gen_names = gen_names+list(db.index.values)
    db = db[db.max(axis=1) > 1]
    genera_1.append(db)
    gen_names_1 = gen_names_1+list(db.index.values)
gen_names = list(set(gen_names))
gen_names_1 = list(set(gen_names_1))
#21
def plot_four_nmds(dbs, metric, name):
    fig = plt.figure(figsize=(15,10))
    #fig.suptitle(name+metric+'\n\n\n')
    ax1, ax2, ax3, ax4 = plt.subplot(221), plt.subplot(222), plt.subplot(223), plt.subplot(224)
    axs = [ax3, ax1, ax2, ax4]
    ax1.set_title('Minikraken V1\n(without human genome)'), ax2.set_title('Minikraken V2\n(with human genome)'), ax3.set_title('GTDB'), ax4.set_title('RefSeq Complete V93')
    
    for db in range(len(dbs)):
        n = db
        db = dbs[db].transpose()
        pos, npos, stress = transform_for_NMDS(db, metric)
        for a in range(len(npos)):
            axs[n].scatter(npos[a,0], npos[a,1], marker=shapes[a], color=colors[a], s=100, edgecolor='k')
        axs[n].set_xlabel('nMDS1')
        axs[n].set_ylabel('nMDS2')
    handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
    handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]

    ax2.legend(handles=handles1+handles2, bbox_to_anchor=(1,1))
    plt.tight_layout()
    return

Bray-Curtis distance strain level

#22
plot_four_nmds([strain[1], strain[3], strain[5], strain[7]], 'braycurtis', 'NMDS confidence=0.1 strain ')
plt.show()

Bray-Curtis distance genus level

#23
plot_four_nmds([genera[1], genera[3], genera[5], genera[7]], 'braycurtis', 'NMDS confidence=0.1 genera ')
plt.show()

Euclidean distance strain level

#24
plot_four_nmds([strain[1], strain[3], strain[5], strain[7]], 'euclidean', 'NMDS confidence=0.1 strain ')
plt.show()

Euclidean distance genus level

#25
plot_four_nmds([genera[1], genera[3], genera[5], genera[7]], 'euclidean', 'NMDS confidence=0.1 genera ')
plt.show()

Jaccard distance strain level

#26
plot_four_nmds([strain[1], strain[3], strain[5], strain[7]], 'jaccard', 'NMDS confidence=0.1 strain ')
plt.show()

Jaccard distance genus level

#27
plot_four_nmds([genera[1], genera[3], genera[5], genera[7]], 'jaccard', 'NMDS confidence=0.1 genera ')
plt.show()

All plots with no confidence parameter set

Bray-curtis distance at strain level

#28
plot_four_nmds([strain[0], strain[2], strain[4], strain[6]], 'braycurtis', 'NMDS no confidence strain ')
plt.show()

Bray-curtis distance at genus level

#29
plot_four_nmds([genera[0], genera[2], genera[4], genera[6]], 'braycurtis', 'NMDS no confidence genera ')
plt.show()

Euclidean distance at strain level

#30
plot_four_nmds([strain[0], strain[2], strain[4], strain[6]], 'euclidean', 'NMDS no confidence strain ')
plt.show()

Euclidean distance at genus level

#31
plot_four_nmds([genera[0], genera[2], genera[4], genera[6]], 'euclidean', 'NMDS no confidence genera ')
plt.show()

Jaccard distance at strain level

#32
plot_four_nmds([strain[0], strain[2], strain[4], strain[6]], 'jaccard', 'NMDS no confidence strain ')
plt.show()

Jaccard distance at genus level

#33
plot_four_nmds([genera[0], genera[2], genera[4], genera[6]], 'jaccard', 'NMDS no confidence genera ')
plt.show()

Kraken2 taxonomy relative abundance

These plots are now only calculated for the classifications that used confidence = 0.1. Genera with below 1% maximum relative abundance are removed and the numbers in brackets are the number of reads that were classified as bacteria.

#34
db_names = ['gtdb', 'gtdb_conf', 'minikraken', 'minikraken_conf', 'minikraken_human', 'minikraken_human_conf', 'refseq', 'refseq_conf']
#bacteria = all_domains['Bacteria']+all_domains['d__Bacteria']
#genera, gen_names, genera_1, gen_names_1, strain, gen_sums = [], [], [], [], [], []
gen_names_1 = sorted(gen_names_1)
def plot_genera(db, sums, tax_cols, gen_names_1, dname):
    plt.figure(figsize=(10,5))
    ax1 = plt.subplot(111)
    bottom = [0 for x in range(len(db.columns))]
    handles = []
    for g in range(len(gen_names_1)):
        if gen_names_1[g] in db.index.values:
            this_row = db.loc[gen_names_1[g], :].values
            ax1.bar(x1, this_row, bottom=bottom, color=tax_cols[g], edgecolor='k')
            handles.append(Patch(facecolor=tax_cols[g], edgecolor='k', label=gen_names_1[g]))
            for b in range(len(bottom)):
                bottom[b] += this_row[b]
    ax1.legend(handles=handles, bbox_to_anchor=(1, 1.03), ncol=3)
    plt.xticks(x1, ['' for x in x1])
    plt.ylabel('Relative abundance(%)')
    plt.xlim([-0.5, 20.5])
    plt.ylim([0, 100])
    for x in x1:
        n = str(int(sums[samples[x]]))
        ax1.text(x, -2, sample_names[x]+' ('+n+')', color=colors[x], rotation=90, va='top', ha='center')
    plt.tight_layout()
    return

gen_plot = [genera_1[1], genera_1[3], genera_1[5], genera_1[7]]
db_name = ['GTDB', 'Minikraken V1', 'Minikraken V2', 'RefSeq Complete V93']
all_sums = [gen_sums[1], gen_sums[3], gen_sums[5], gen_sums[7]]
tax_cols = get_cols(len(gen_names_1))

Minikraken v1

#35
plot_genera(gen_plot[1], all_sums[1], tax_cols, gen_names_1, db_name[1])
plt.show()

Minikraken v2

#36
plot_genera(gen_plot[2], all_sums[2], tax_cols, gen_names_1, db_name[2])
plt.show()

GTDB

#37
plot_genera(gen_plot[0], all_sums[0], tax_cols, gen_names_1, db_name[0])
plt.show()

RefSeq Complete v93

#38
plot_genera(gen_plot[3], all_sums[3], tax_cols, gen_names_1, db_name[3])
plt.show()

Kraken2 similarities and differences between body sites

Here I have carried out ANCOM2 tests for differential abundance of genera between body sites. All genera are then plotted on a heatmap with mean values for each body site and stars to denote significant differences as determined by ANCOM.

While many of these results vary between databases, it is worth noting that some differences are consistent, e.g.:
1. Prevotella are always more abundant in saliva samples
2. Streptococcus are always more abundant in buccal samples
3. Klebsiella (where present) are always more abundant in blood samples

#39
def get_differences(new_genus, db_name):
    new_genus = new_genus.drop(['SRR8595488'], axis=1)
    new_genus = new_genus[new_genus.max(axis=1) > 0.1]
    samples = list(new_genus.columns)
    
    list_comparisons = [['Blood', 'Saliva'], ['Blood', 'Buccal'], ['Saliva', 'Buccal'], ['Saliva', 'Saliva_euk'], ['Buccal', 'Buccal_euk'], ['Blood', 'Saliva_euk'], ['Blood', 'Buccal_euk'], ['Saliva_euk', 'Buccal_euk']]
    comparisons, metadata, comp_len = [], [], []
    for a in range(len(list_comparisons)):
        keeping = []
        this_md = []
        for b in range(len(samples)):
            if site_dict[samples[b]] in list_comparisons[a]:
                keeping.append(True)
                this_md.append([samples[b], site_dict[samples[b]]])
            else:
                keeping.append(False)
        this_comp = new_genus.loc[:, keeping]
        this_comp = this_comp[this_comp.max(axis=1) > 0.1]
        comparisons.append(this_comp)
        this_md = pd.DataFrame(this_md, columns=['Samples', 'Groups'])
        #this_md.set_index('Samples', inplace=True)
        metadata.append(this_md)
        comp_len.append(this_comp.shape[0])
    new_genus = new_genus.rename(columns=site_dict, inplace=False)
    return comparisons, metadata, new_genus, comp_len
db_name = ['GTDB', 'Minikraken V1', 'Minikraken V2', 'RefSeq Complete V93']
comparison_names = [r'Blood vs saliva', r'Blood vs buccal', r'Saliva vs buccal', r'Saliva vs saliva_euk', r'Buccal vs buccal_euk', r'Blood vs saliva_euk', r'Blood vs buccal_euk', r'Saliva_euk vs buccal_euk']
source("/Users/robynwright/Documents/OneDrive/Github/R-notebooks/ancom_v2.1.R")

get_ancom <- function(ft, md) {
all_ancom = list()
for (a in 1:8){
  feature_table = ft[a]
  meta_data = md[a]
  process = feature_table_pre_process(feature_table, meta_data, 'Samples', 'Groups', lib_cut=10, neg_lb=TRUE)
  ancom_out = ANCOM(process$feature_table, process$meta_data, process$struc_zero, main_var='Groups')
  all_ancom[[a]] <- ancom_out$out
}
return(all_ancom)
}
def sort_ancom_results(r_ancom):
  ancom_lists_09, ancom_lists_08, ancom_lists_07, ancom_lists_06 = [], [], [], []
  for a in range(len(r_ancom)):
    this_sig_09, this_sig_08, this_sig_07, this_sig_06 = [], [], [], []
    r_ancom[a].set_index('taxa_id', inplace=True)
    all_sp = list(r_ancom[a].index.values)
    for b in range(len(all_sp)):
      if r_ancom[a].loc[all_sp[b], 'detected_0.9'] == True:
        this_sig_09.append(all_sp[b])
      if r_ancom[a].loc[all_sp[b], 'detected_0.8'] == True:
        this_sig_08.append(all_sp[b])
      if r_ancom[a].loc[all_sp[b], 'detected_0.7'] == True:
        this_sig_07.append(all_sp[b])
      if r_ancom[a].loc[all_sp[b], 'detected_0.6'] == True:
        this_sig_06.append(all_sp[b])
    ancom_lists_09.append(this_sig_09), ancom_lists_08.append(this_sig_08), ancom_lists_07.append(this_sig_07), ancom_lists_06.append(this_sig_06)
  return [ancom_lists_09, ancom_lists_08, ancom_lists_07, ancom_lists_06]
def plot_heatmap(new_genus, ANCOM):
    print(ANCOM)
    names = ['Blood', 'Saliva', 'Buccal', 'Saliva_euk', 'Buccal_euk']
    other_names = ['Abundance', r'Blood $vs$ saliva', r'Blood $vs$ buccal', r'Saliva $vs$ buccal', r'Saliva $vs$ saliva_euk', r'Buccal $vs$ buccal_euk', r'Blood $vs$ saliva_euk', r'Blood $vs$ buccal_euk', r'Saliva_euk $vs$ buccal_euk']
    colormap, norm = mpl.cm.get_cmap('plasma', 256), mpl.colors.Normalize(vmin=0, vmax=1)
    m = mpl.cm.ScalarMappable(norm=norm, cmap=colormap)
    if list(new_genus.index.values)[0][0] == 'A':
        new_genus = new_genus.iloc[::-1]
    figure = plt.figure(figsize=(5,new_genus.shape[0]*0.2))
    ax1 = plt.subplot(111)
    genus_names = list(new_genus.index.values)
    y = []
    for g in range(len(genus_names)):
        this_row = new_genus.loc[genus_names[g], :]
        values = [(np.mean(this_row[name].values)) for name in names]
        bottom, top, x = [g for a in range(5)], [1 for a in range(5)], [a for a in range(5)]
        y.append(g+0.5)
        ma = max(values)
        values = [v/ma for v in values]
        values = [m.to_rgba(v) for v in values]
        ax1.bar(x, top, bottom=bottom, color=values, edgecolor='k', width=1)
        x.append(5)
        ax1.scatter(x[-1], bottom[-1]+0.5, color='k', s=ma*5)
        sig, x_plt = [], x[-1]
        for a in range(len(ANCOM)):
            x_plt += 0.75
            if genus_names[g] in ANCOM[a]: ax1.scatter(x_plt, bottom[-1]+0.5, marker='*', color='k')
            x.append(x_plt)
    plt.ylim([0, len(genus_names)]), plt.xlim([-0.5, x[-1]+0.5])
    plt.yticks(y, genus_names)
    plt.xticks(x, names+other_names, rotation=90)
    ax1.xaxis.set_ticks_position('top')
    plt.tight_layout()
    plt.show()
    return

Minikraken v1 ANCOM

These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.

this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/minikraken_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
    this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
    ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Comparison Shared genera ANCOM 0.6 ANCOM 0.7 ANCOM 0.8 ANCOM 0.9
Blood vs saliva 41 41 37 26 8
Blood vs buccal 37 27 21 15 3
Saliva vs buccal 45 3 3 2 1
Saliva vs saliva_euk 41 0 0 0 0
Buccal vs buccal_euk 39 2 0 0 0
Blood vs saliva_euk 38 32 24 15 3
Blood vs buccal_euk 34 11 5 2 2
Saliva_euk vs buccal_euk 43 5 3 2 1

Minikraken v1 heatmap

Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).

plot_heatmap(new_genus, ancom[0])

Minikraken v2 ANCOM

These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.

this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/minikraken_human_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
    this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
    ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Comparison Shared genera ANCOM 0.6 ANCOM 0.7 ANCOM 0.8 ANCOM 0.9
Blood vs saliva 49 48 47 38 9
Blood vs buccal 48 35 16 14 4
Saliva vs buccal 39 5 4 1 0
Saliva vs saliva_euk 30 0 0 0 0
Buccal vs buccal_euk 37 0 0 0 0
Blood vs saliva_euk 49 47 33 22 5
Blood vs buccal_euk 50 19 14 12 4
Saliva_euk vs buccal_euk 42 4 3 3 2

Minikraken v2 heatmap

Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).

plot_heatmap(new_genus, ancom[0])

GTDB ANCOM

These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.

this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/gtdb_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
    this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
    ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Comparison Shared genera ANCOM 0.6 ANCOM 0.7 ANCOM 0.8 ANCOM 0.9
Blood vs saliva 98 88 82 32 6
Blood vs buccal 91 21 14 8 5
Saliva vs buccal 96 18 10 2 1
Saliva vs saliva_euk 93 3 2 0 0
Buccal vs buccal_euk 95 8 5 1 0
Blood vs saliva_euk 92 27 17 11 4
Blood vs buccal_euk 75 7 7 3 2
Saliva_euk vs buccal_euk 97 19 13 7 1

GTDB heatmap

Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).

plot_heatmap(new_genus, ancom[0])

RefSeq Complete v93 ANCOM

These differences are determined by ANCOM2 and the most conservative of these is the ANCOM 0.9 set, which is then used for plotting heatmaps.

this_genera_db = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera/refseq_conf.csv', header=0, index_col=0)
comparisons, md, new_genus, comp_len = get_differences(this_genera_db, db_name[1])
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = md[0], md[1], md[2], md[3], md[4], md[5], md[6], md[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
for c in range(len(comparison_names)):
    this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
    ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])
py$ancom_df %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Comparison Shared genera ANCOM 0.6 ANCOM 0.7 ANCOM 0.8 ANCOM 0.9
Blood vs saliva 85 81 77 35 11
Blood vs buccal 82 65 36 21 7
Saliva vs buccal 69 9 6 3 0
Saliva vs saliva_euk 64 0 0 0 0
Buccal vs buccal_euk 76 4 4 1 0
Blood vs saliva_euk 86 72 37 20 4
Blood vs buccal_euk 79 18 15 8 2
Saliva_euk vs buccal_euk 84 12 10 5 1

RefSeq Complete v93 heatmap

Relative abundance is scaled within each genus, with yellow being highest in abundance and purple lowest. Blobs are scaled to maximum relative abundance. Stars denote genera that are significantly differentially abundant between body sites (ANCOM2 0.9).

plot_heatmap(new_genus, ancom[0])

Kraken2 intersection between databases

Now we are looking at the taxa (grouped to genus level, to hopefully allow for differences between databases) that are in common between the different databases that have been used with Kraken2. We are also using only the versions that have confidence=0.1.

Genera common to all databases (reads before and after)

The plots below here show the number of reads in each sample before and after removal of the genera that aren’t present in all databases.

#print(bracken_all_db)
def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3

conf_bracken_genus = []
genus_in_each = []
for a in range(len(bracken_all_db)):
  if a not in [0, 2, 4, 6]:
    continue
  this_db = pd.DataFrame(bracken_all_db[a])
  taxa = list(this_db.index.values)
  tax_dict = {}
  for b in range(len(taxa)):
    orig_tax = taxa[b]
    if 's__' in taxa[b]:
      taxa[b] = taxa[b].replace('s__', '')
    taxa[b] = taxa[b].split(' ')[0]
    taxa[b] = taxa[b].replace("'", "")
    tax_dict[orig_tax] = taxa[b]
  this_db.rename(index=tax_dict, inplace=True)
  genus = this_db.groupby(by=this_db.index, axis=0).sum()
  conf_bracken_genus.append(genus)
  genus_in_each.append(list(genus.index.values))

[print(len(gen)) for gen in genus_in_each]
overall_genus = intersection(genus_in_each[0], genus_in_each[1])
overall_genus = intersection(overall_genus, genus_in_each[2])
overall_genus = intersection(overall_genus, genus_in_each[3])
print(len(overall_genus))
fig = plt.figure(figsize=(10,6))
ax = [plt.subplot(223), plt.subplot(221), plt.subplot(222), plt.subplot(224)]
x1 = [x for x in range(21)]
x2 = [x+0.4 for x in range(21)]
x3 = [x+0.2 for x in range(21)]

conf_bracken_overall = []
names = ['gtdb', 'minikrakenv1', 'minikrakenv2', 'refseq']
labels = ['GTDB', 'Minikraken v1 (without human)', 'Minikraken v2 (with human)', 'RefSeq Complete v93']
sum_reduced = []
for db in range(len(conf_bracken_genus)):
  new_db = pd.DataFrame(conf_bracken_genus[db].loc[overall_genus, :])
  conf_bracken_overall.append(new_db)
  new_db.to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/'+names[db]+'_common.csv')
  conf_bracken_genus[db].to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/'+names[db]+'.csv')
  sums = new_db.sum(axis=0)
  sum_reduced.append(sums)
  ax[db].bar(x2, sums, color=colors, edgecolor='k', width=0.4, alpha=0.5)
  sums = conf_bracken_genus[db].sum(axis=0)
  ax[db].bar(x1, sums, color=colors, edgecolor='k', width=0.4)
  ax[db].semilogy()
  ax[db].set_title(labels[db])
  plt.sca(ax[db])
  if db == 1 or db == 2:
    plt.xticks([])
  else:
    plt.xticks(x3, sample_names, rotation=90)
ax[0].set_ylabel('Number of reads'), ax[1].set_ylabel('Number of reads')
handles = [Patch(facecolor='k', edgecolor='k', label='All genera'), Patch(facecolor='k', edgecolor='k', alpha=0.5, label='Genera present in all')]
ax[2].legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()



Genera common to all databases (reads remaining)

fig = plt.figure(figsize=(8,4))
ax = plt.subplot(111)
x1 = [x*5 for x in range(21)]
x2 = [x+1 for x in x1]
x3 = [x+1 for x in x2]
x4 = [x+1 for x in x3]
xplt = [x+0.5 for x in x2]

x = [x3, x1, x2, x4]
al = [0.6, 1, 0.8, 0.4]
for db in range(len(sum_reduced)):
  ax.bar(x[db], sum_reduced[db], color=colors, width=1, edgecolor='k', alpha=al[db])
handles = [Patch(facecolor='k', edgecolor='k', label='Minikraken v1'), Patch(facecolor='k', edgecolor='k', alpha=0.8, label='Minikraken v2'), Patch(facecolor='k', edgecolor='k', alpha=0.6, label='GTDB'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='RefSeq Complete v93')]
ax.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1))
plt.xticks(xplt, sample_names, rotation=90)
plt.semilogy()
plt.ylabel('Number of reads')
plt.tight_layout()
plt.show()


Genera stacked bars (absolute abundance)

for db in range(len(conf_bracken_overall)):
  rename_samples = {}
  for sample in list(conf_bracken_overall[db].columns):
    rename_samples[sample] = sample+'_'+names[db]
  this_db = pd.DataFrame(conf_bracken_overall[db].rename(columns=rename_samples))
  if db == 0:
    overall_genus = this_db
  else:
    overall_genus = pd.concat([overall_genus, this_db], axis=1)
overall_genus = pd.DataFrame(overall_genus)
#overall_genus = pd.DataFrame(overall_genus.divide(overall_genus.sum(axis=0)).multiply(100))
overall_genus = overall_genus[overall_genus.max(axis=1) > 25000]
sums = overall_genus.sum(axis=0)
gen_colors = get_cols(overall_genus.shape[0])
print(overall_genus)
fig = plt.figure(figsize=(14,10))
ax1 = plt.subplot(334)
ax = [ax1, plt.subplot(331, sharey=ax1), plt.subplot(332, sharey=ax1), plt.subplot(335, sharey=ax1)]
ax[1].set_ylabel('Number of reads')
ax[0].set_ylabel('Number of reads')
x1 = [x for x in range(21)]
a = 0
col_samples = list(overall_genus.columns)
#line 20
new_db = []
for name in names:
  keeping = []
  for s in list(overall_genus.columns):
    if name in s:
      keeping.append(True)
    else:
      keeping.append(False)
  other_new_db = pd.DataFrame(overall_genus.loc[:, keeping])
  new_db.append(other_new_db)
for db in range(len(new_db)):
  this_genera = list(new_db[db].index.values)
  handles = []
  for g in range(len(this_genera)):
    if g == 0:
      bottom = [0 for c in x1]
    these_values = new_db[db].loc[this_genera[g], :].values
    ax[db].bar(x1, these_values, bottom=bottom, color=gen_colors[g], edgecolor='k', width=1)
    handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=this_genera[g]))
    bottom = [bottom[x]+these_values[x] for x in range(len(bottom))]
  plt.sca(ax[db])
  if db == 1 or db == 2:
    plt.xticks([])
  else:
    plt.xticks(x1, sample_names, rotation=90)
  plt.semilogy()
ax[2].legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), ncol=3)
#plt.tight_layout()
plt.show()


Genera stacked bars (relative abundance)

for db in range(len(conf_bracken_overall)):
  rename_samples = {}
  for sample in list(conf_bracken_overall[db].columns):
    rename_samples[sample] = sample+'_'+names[db]
  this_db = pd.DataFrame(conf_bracken_overall[db].rename(columns=rename_samples))
  if db == 0:
    overall_genus = this_db
  else:
    overall_genus = pd.concat([overall_genus, this_db], axis=1)
overall_genus = pd.DataFrame(overall_genus)
overall_genus = pd.DataFrame(overall_genus.divide(overall_genus.sum(axis=0)).multiply(100))
overall_genus = overall_genus[overall_genus.max(axis=1) > 1]
sums = overall_genus.sum(axis=0)
gen_colors = get_cols(overall_genus.shape[0])
print(overall_genus)
fig = plt.figure(figsize=(14,10))
ax1 = plt.subplot(334)
ax = [ax1, plt.subplot(331, sharey=ax1), plt.subplot(332, sharey=ax1), plt.subplot(335, sharey=ax1)]
ax[1].set_ylabel('Number of reads')
ax[0].set_ylabel('Number of reads')
x1 = [x for x in range(21)]
a = 0
col_samples = list(overall_genus.columns)
#line 20
new_db = []
for name in names:
  keeping = []
  for s in list(overall_genus.columns):
    if name in s:
      keeping.append(True)
    else:
      keeping.append(False)
  other_new_db = pd.DataFrame(overall_genus.loc[:, keeping])
  new_db.append(other_new_db)
for db in range(len(new_db)):
  this_genera = list(new_db[db].index.values)
  handles = []
  for g in range(len(this_genera)):
    if g == 0:
      bottom = [0 for c in x1]
    these_values = new_db[db].loc[this_genera[g], :].values
    ax[db].bar(x1, these_values, bottom=bottom, color=gen_colors[g], edgecolor='k', width=1)
    handles.append(Patch(facecolor=gen_colors[g], edgecolor='k', label=this_genera[g]))
    bottom = [bottom[x]+these_values[x] for x in range(len(bottom))]
  plt.sca(ax[db])
  if db == 1 or db == 2:
    plt.xticks([])
  else:
    plt.xticks(x1, sample_names, rotation=90)
ax[2].legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), ncol=3)
#plt.tight_layout()
plt.show()


Presence/absence of genera

Here, each sample is plotted using each of the four different databases, from top to bottom: GTDB, Minikraken v1, Minikraken v2 and RefSeq Complete v93. Each genera is plotted as a column, with black indicating that it is present and white indicating that it is absent.

Note that for some sample sets, e.g. saliva, there is far more consensus within samples than in other sample sets, e.g. blood.

count = 0
plt.figure(figsize=(6,6))
x, y, b0, b1, b2, b3 = [], [], [], [], [], []
for s in full_name_dict:
  print(s)
  ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
  count += 1
  nums, pres_colors = [], []
  for db in conf_bracken_overall:
    nums.append(list(db.loc[:, s].values))
    pres_colors.append([])
  for a in range(len(nums[0])):
    if nums[0][a] > 0: pres_colors[0].append('k')
    else: pres_colors[0].append('w')
    if nums[1][a] > 0: pres_colors[1].append('k')
    else: pres_colors[1].append('w')
    if nums[2][a] > 0: pres_colors[2].append('k')
    else: pres_colors[2].append('w')
    if nums[3][a] > 0: pres_colors[3].append('k')
    else: pres_colors[3].append('w')
    if s == 'SRR8595488':
      x.append(a), y.append(1), b0.append(0), b1.append(1), b2.append(2), b3.append(3)
  ax.bar(x, y, bottom=b3, color=pres_colors[0], width=1)
  ax.bar(x, y, bottom=b2, color=pres_colors[1], width=1)
  ax.bar(x, y, bottom=b1, color=pres_colors[2], width=1)
  ax.bar(x, y, bottom=b0, color=pres_colors[3], width=1)
  plt.sca(ax)
  plt.xticks([]), plt.yticks([2], [full_name_dict[s]])
  plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 4])
plt.subplots_adjust(hspace=0.05)
plt.show()


Presence/absence of genera (in all 4 databases)

Now I am combining the data from the 4 databases, and only plotting a genus in a sample if it is present in all 4 databases. Again, black indicates presence while white indicates absence.

count = 0
plt.figure(figsize=(6,6))
x, y = [], []
for s in full_name_dict:
  ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
  count += 1
  nums, pres_colors = [], []
  for db in conf_bracken_overall:
    nums.append(list(db.loc[:, s].values))
  for a in range(len(nums[0])):
    if nums[0][a] > 0 and nums[1][a] > 0 and nums[2][a] > 0 and nums[3][a] > 0: pres_colors.append('k')
    else: pres_colors.append('w')
    if s == 'SRR8595488':
      x.append(a), y.append(1)
  ax.bar(x, y, color=pres_colors, width=1)
  plt.sca(ax)
  plt.xticks([]), plt.yticks([0.5], [full_name_dict[s]])
  plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 1])
plt.subplots_adjust(hspace=0.05)
plt.show()


Presence/absence of genera (in 3 of 4 databases)

Now I am combining the data from the 4 databases, and only plotting a genus in a sample if it is present in 3 of 4 databases. Again, black indicates presence while white indicates absence.

count = 0
plt.figure(figsize=(6,6))
x, y = [], []
for s in full_name_dict:
  ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
  count += 1
  nums, pres_colors = [], []
  for db in conf_bracken_overall:
    nums.append(list(db.loc[:, s].values))
  for a in range(len(nums[0])):
    this_num = [nums[0][a], nums[1][a], nums[2][a], nums[3][a]]
    if sum(this_num) >= 3: pres_colors.append('k')
    else: pres_colors.append('w')
    if s == 'SRR8595488':
      x.append(a), y.append(1)
  ax.bar(x, y, color=pres_colors, width=1)
  plt.sca(ax)
  plt.xticks([]), plt.yticks([0.5], [full_name_dict[s]])
  plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 1])
plt.subplots_adjust(hspace=0.05)
plt.show()


Presence/absence of genera (in 2 of 4 databases)

Now I am combining the data from the 4 databases, and only plotting a genus in a sample if it is present in 2 of 4 databases. Again, black indicates presence while white indicates absence.

count = 0
plt.figure(figsize=(6,6))
x, y = [], []
for s in full_name_dict:
  ax = plt.subplot2grid((21, 10), (count,2), colspan=8)
  count += 1
  nums, pres_colors = [], []
  for db in conf_bracken_overall:
    nums.append(list(db.loc[:, s].values))
  for a in range(len(nums[0])):
    this_num = [nums[0][a], nums[1][a], nums[2][a], nums[3][a]]
    if sum(this_num) >= 2: pres_colors.append('k')
    else: pres_colors.append('w')
    if s == 'SRR8595488':
      x.append(a), y.append(1)
  ax.bar(x, y, color=pres_colors, width=1)
  plt.sca(ax)
  plt.xticks([]), plt.yticks([0.5], [full_name_dict[s]])
  plt.xlim([x[0]-0.5, x[-1]+0.5]), plt.ylim([0, 1])
plt.subplots_adjust(hspace=0.05)
plt.show()


Summary

These plots show that almost all genera are present in saliva samples, while the other body sites just have subsets of these. There is also the largest amount of consensus between saliva samples, and the use of different databases to determine presence/absence. I will now include the genera that are present in 3 of 4 databases for further analyses.


Differences between body sites based on presence/absence

These plots are looking at the differences between body sites based on presence/absence (using the taxa that are present in at least 3 of 4 databases). I have also removed the HuRef blood before carrying out any comparisons.

new_db = []
for s in full_name_dict:
  if s == 'SRR8595488': continue
  nums, presence = [], [s]
  for db in conf_bracken_overall:
    nums.append(list(db.loc[:, s].values))
  for a in range(len(nums[0])):
    this_num = [nums[0][a], nums[1][a], nums[2][a], nums[3][a]]
    if sum(this_num) >= 3: presence.append(1)
    else: presence.append(0)
  new_db.append(presence)
new_db = pd.DataFrame(new_db, columns=['Samples']+list(conf_bracken_overall[0].index.values))
new_db.set_index('Samples', inplace=True)
new_db = new_db.transpose()
new_db.to_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/combined_db_3of4.csv')

list_comparisons = [['Blood', 'Saliva'], ['Blood', 'Buccal'], ['Saliva', 'Buccal'], ['Saliva', 'Saliva_euk'], ['Buccal', 'Buccal_euk'], ['Blood', 'Saliva_euk'], ['Blood', 'Buccal_euk'], ['Saliva_euk', 'Buccal_euk']]
samples = list(new_db.columns)
comparisons, metadata, comp_len = [], [], []
for a in range(len(list_comparisons)):
    keeping = []
    this_md = []
    for b in range(len(samples)):
        if site_dict[samples[b]] in list_comparisons[a]:
            keeping.append(True)
            this_md.append([samples[b], site_dict[samples[b]]])
        else:
            keeping.append(False)
    this_comp = new_db.loc[:, keeping]
    comparisons.append(this_comp)
    this_md = pd.DataFrame(this_md, columns=['Samples', 'Groups'])
    #this_md.set_index('Samples', inplace=True)
    metadata.append(this_md)
    comp_len.append(this_comp.shape[0])
    
c1, c2, c3, c4, c5, c6, c7, c8 = comparisons[0], comparisons[1], comparisons[2], comparisons[3], comparisons[4], comparisons[5], comparisons[6], comparisons[7]
md1, md2, md3, md4, md5, md6, md7, md8 = metadata[0], metadata[1], metadata[2], metadata[3], metadata[4], metadata[5], metadata[6], metadata[7]
ft = list(py$c1, py$c2, py$c3, py$c4, py$c5, py$c6, py$c7, py$c8)
md = list(py$md1, py$md2, py$md3, py$md4, py$md5, py$md6, py$md7, py$md8)
all_ancom = get_ancom(ft, md)
ancom = sort_ancom_results(r.all_ancom)
ancom_df = []
comparison_names = [r'Blood vs saliva', r'Blood vs buccal', r'Saliva vs buccal', r'Saliva vs saliva_euk', r'Buccal vs buccal_euk', r'Blood vs saliva_euk', r'Blood vs buccal_euk', r'Saliva_euk vs buccal_euk']
for c in range(len(comparison_names)):
    this_row = [comparison_names[c], comp_len[c], len(ancom[3][c]), len(ancom[2][c]), len(ancom[1][c]), len(ancom[0][c])]
    ancom_df.append(this_row)
ancom_df = pd.DataFrame(ancom_df, columns=['Comparison', 'Shared genera', 'ANCOM 0.6', 'ANCOM 0.7', 'ANCOM 0.8', 'ANCOM 0.9'])


Tables of presence/absence

This is the table of presence/absence used for the ANCOM tests.

pres_abs = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kraken2/db_genera_in_common/combined_db_3of4.csv', header=0, index_col=0)
pres_abs.rename(columns=full_name_dict, inplace=True)
py$pres_abs %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
PGPC-0002 Blood PGPC-0005 Blood PGPC-0006 Blood PGPC-0050 Blood PGPC-0002 Saliva PGPC-0005 Saliva PGPC-0006 Saliva PGPC-0050 Saliva PGPC-0002 Buccal PGPC-0005 Buccal PGPC-0006 Buccal PGPC-0050 Buccal PGPC-0002 Saliva_euk PGPC-0005 Saliva_euk PGPC-0006 Saliva_euk PGPC-0050 Saliva_euk PGPC-0002 Buccal_euk PGPC-0005 Buccal_euk PGPC-0006 Buccal_euk PGPC-0050 Buccal_euk
Acaryochloris 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acetoanaerobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acetobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acetobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acetohalobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acetomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acholeplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Achromobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidaminococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidianus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidiferrobacter 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Acidihalobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidilobus 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Acidimicrobium 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 0
Acidiphilium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidipropionibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidithiobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acidobacterium 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Acidothermus 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0
Aciduliprofundum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Acinetobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinoalloteichus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinomyces 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinoplanes 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinopolymorpha 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinopolyspora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinosynnema 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Actinotignum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Adlercreutzia 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Advenella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aequorivita 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aeribacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aerococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aeromicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aeromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Afipia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Agarilytica 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Agarivorans 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aggregatibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Agrobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Agrococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Agromyces 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ahniella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Akkermansia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alcaligenes 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alcanivorax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Algibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Algoriphagus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alicycliphilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alicyclobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aliivibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alistipes 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alkalilimnicola 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
Alkaliphilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alkalitalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Allochromatium 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Allofrancisella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Allokutzneria 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Altererythrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Alteromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aminobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aminobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aminomonas 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
Ammonifex 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0
Amphibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Amycolatopsis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anabaena 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anabaenopsis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anaerococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anaerolinea 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Anaeromyxobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anaerostipes 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anaerotignum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anaplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anderseniella 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aneurinibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Anoxybacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Antarctobacter 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Aquabacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aquaspirillum 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Aquifex 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aquiflexum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aquimarina 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aquitalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arachidicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arcanobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Archaeoglobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arcobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arcticibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arenibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aromatoleum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arsenicicoccus 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arthrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Arthrospira 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Asaia 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Asticcacaulis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Atopobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aurantimicrobium 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0
Auraticoccus 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aureimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Aureitalea 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Auricoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Auritidibacter 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
Austwickia 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1
Azorhizobium 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Azospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Azospirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Azotobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bacterioplanes 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Bacteriovorax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bacteroides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Barnesiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bartonella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Basilea 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bdellovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Beggiatoa 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Beijerinckia 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Belliella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bernardetia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Beutenbergia 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bibersteinia 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bifidobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Blastochloris 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Blastococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Blastomonas 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Blattabacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Blautia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Blochmannia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bordetella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Borrelia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Borreliella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bosea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brachybacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brachyspira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bradymonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Bradyrhizobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brenneria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Breoghania 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brevefilum 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brevibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brevibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brevundimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brochothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Brucella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Buchnera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Burkholderia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Butyrivibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldanaerobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldicellulosiruptor 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldilinea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldimicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldisericum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Calditerrivibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldithrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caldivirga 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Calothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Campylobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Capnocytophaga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Carboxydocella 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Carboxydothermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cardinium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Carnobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Castellaniella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Catenovulum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Catenulispora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Caulobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cedecea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Celeribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cellulomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cellulophaga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cellulosilyticum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cellulosimicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cellvibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chamaesiphon 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chania 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chelativorans 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chelatococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chitinophaga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chlamydia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chloracidobacterium 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Chlorobaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chlorobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chloroflexus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chloroherpeton 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Christensenella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chromobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chromohalobacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chroococcidiopsis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chryseobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chryseolinea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Chthonomonas 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Citrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Citromicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Clavibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cloacibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Clostridioides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Clostridium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cnuibacter 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Cobetia 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cohaesibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Collimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Collinsella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Colwellia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Comamonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Conexibacter 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Confluentimicrobium 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Congregibacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Coprothermobacter 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Coraliomargarita 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Corallococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Coriobacterium 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
Corynebacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Coxiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Crenobacter 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
Crinalium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Croceibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Croceicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cronobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cryobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cryptobacterium 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Cuniculiplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cupriavidus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Curtobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Curvibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cutibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cyanobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cyanobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cyanothece 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cyclobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cycloclasticus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cylindrospermum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cystobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cytophaga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dechloromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Deferribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Defluviimonas 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Defluviitoga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dehalobacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dehalobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dehalococcoides 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dehalogenimonas 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Deinococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Delftia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Denitrobacterium 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1
Denitrovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dermabacter 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1
Dermacoccus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dermatophilus 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfarculus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfatibacillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfitobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfobacca 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfobacula 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfobulbus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfocapsa 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfohalobium 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Desulfomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfomonile 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfosporosinus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfotalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Desulfotomaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfurella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfurispirillum 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Desulfurivibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfurobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfurococcus 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Desulfuromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Devosia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Devriesea 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Dialister 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Diaphorobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dichelobacter 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dickeya 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dictyoglomus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dietzia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dinoroseobacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Dokdonella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dokdonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dolichospermum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Draconibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dyadobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Dyella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Echinicola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ectothiorhodospira 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Edwardsiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Eggerthella 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ehrlichia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Eikenella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Elizabethkingia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Elusimicrobium 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Endomicrobium 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Endozoicomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ensifer 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Enterobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Enterococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Entomoplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Epibacterium 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ereboglobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Erwinia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Erysipelothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Erythrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Escherichia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ethanoligenens 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Eubacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Euzebya 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Exiguobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ezakiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Faecalibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Faecalibaculum 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Faecalitalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fastidiosipila 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fermentimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ferrimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ferriphaselus 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ferroglobus 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ferroplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fervidobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fibrella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fibrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fictibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Filifactor 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Filimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fimbriimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Finegoldia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fischerella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flagellimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flammeovirga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flavisolibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flavivirga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flavobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flavonifractor 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Flexistipes 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fluviicola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Formosa 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Francisella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Frankia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Frateuria 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Friedmanniella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Frischella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Frondihabitans 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fuerstia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Fusobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gallaecimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gallibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gallionella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geitlerinema 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gemella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geminocystis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gemmatimonas 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gemmatirosa 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gemmobacter 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geoalkalibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geodermatophilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geoglobus 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Geosporobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gibbsiella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gilliamella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gillisia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gilvibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Glaciecola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Glaesserella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gloeobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gloeocapsa 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gloeomargarita 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Gluconacetobacter 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0
Gluconobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Glutamicibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gordonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gordonibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gottschalkia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gramella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Granulibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Granulicella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Granulosicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Grimontia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Gynuella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Haematospirillum 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Haemophilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hafnia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hahella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halalkalicoccus 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0
Halanaeroarchaeum 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0
Halanaerobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Haliangium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halioglobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Haliscomenobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Haloarcula 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halobacteriovorax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halobacterium 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halobacteroides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halobiforma 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halocynthiibacter 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halodesulfurarchaeum 0 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 0 0 0
Haloferax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halogeometricum 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halohasta 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0
Halomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Halomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halopenitus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halopiger 1 0 0 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1
Haloplanus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Haloquadratum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halorhabdus 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halorhodospira 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halorientalis 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halorubrum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halostagnicola 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Halotalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Haloterrigena 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halothece 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halothermothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Halothiobacillus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hartmannibacter 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Helicobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Heliobacterium 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1
Herbaspirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Herbinix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Herminiimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hippea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hirschia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Histophilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hoeflea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hoyosella 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Humibacter 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1
Hydrogenobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hydrogenobaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hydrogenophaga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hydrogenovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hymenobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hyperthermus 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 0 1 0 0
Hyphomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Hyphomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ichthyobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Idiomarina 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ignavibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ignicoccus 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1
Ilyobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Immundisolibacter 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0
Intestinimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Intrasporangium 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Isoptericola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Isosphaera 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
Janibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Jannaschia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Janthinobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Jatrophihabitans 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Jeongeupia 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
Jeotgalibaca 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Jeotgalibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Jeotgalicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Jiangella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Jonesia 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1
Jonquetella 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
Kangiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ketobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ketogulonicigenium 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kibdelosporangium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kineococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kingella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kiritimatiella 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Kitasatospora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Klebsiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kluyvera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kocuria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Komagataeibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kordia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kosakonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kosmotoga 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kozakia 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1
Kribbella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kurthia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kushneria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kutzneria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kyrpidia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Kytococcus 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Labrenzia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Laceyella 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lachnoanaerobaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lachnoclostridium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lacimicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lacinutrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lactobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lactococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lacunisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Laribacter 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lawsonella 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Lawsonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leadbetterella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leclercia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Legionella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leifsonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leisingera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lelliottia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leminorella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lentibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lentzea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leptolyngbya 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leptospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leptospirillum 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leptothrix 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Leptotrichia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Leuconostoc 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Libanicoccus 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Liberibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Limnobaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Limnochorda 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Limnohabitans 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Listeria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lonsdalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Luteibacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Luteimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Luteipulveratus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Luteitalea 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lutibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lysinibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Lysobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Macrococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mageeibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Magnetococcus 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Magnetospira 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Magnetospirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mahella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mannheimia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Maribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Maricaulis 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marichromatium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinilactibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinithermus 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinitoga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marinovum 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Mariprofundus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Maritalea 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Marivirga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marivivens 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Marmoricola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Martelella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Massilia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Megamonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Megasphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Meiothermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Melaminivora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Melioribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Melissococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mesoplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mesorhizobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mesotoga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Metallosphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanobrevibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanocaldococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanocella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanococcoides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanocorpusculum 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanoculleus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanofollis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanohalobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanohalophilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanolacinia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanolinea 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Methanolobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanomethylovorans 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanoplanus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanopyrus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanoregula 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanosalsum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanosarcina 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanosphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanosphaerula 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1
Methanospirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanothermobacter 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanothermococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanothermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methanotorris 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylacidiphilum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylibium 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylobacillus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylocaldum 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methyloceanibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylocella 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylococcus 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Methylocystis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylomicrobium 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylophaga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylophilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylosinus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylotenera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methyloversatilis 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Methylovorus 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Methylovulum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Micavibrio 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microbacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microbulbifer 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microcella 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1
Micrococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microcoleus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microcystis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microlunatus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Micromonospora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Micropruina 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1
Microterricola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microvirga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Microvirgula 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
Mitsuaria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mobiluncus 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Modestobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mogibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Monoglobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Moorea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Moorella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Moraxella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Morganella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Moritella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mucilaginibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mucinivorans 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Murdochiella 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
Muribaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Muricauda 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mycetocola 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mycobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mycobacteroides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mycolicibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mycolicibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Mycoplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Myroides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Myxococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nakamurella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Natranaerobius 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Natrialba 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Natrinema 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Natronobacterium 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0
Natronococcus 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Natronolimnobius 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0
Natronomonas 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nautilia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ndongobacter 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Negativicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Neisseria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Neoasaia 0 0 0 0 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 1
Neomicrococcus 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Neorhizobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Neorickettsia 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Niabella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Niastella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitratifractor 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Nitratireductor 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitratiruptor 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrosococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrosomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrososphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrosospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nitrospirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Niveispirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nocardia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nocardioides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nocardiopsis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nodularia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nonlabens 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nonomuraea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Nostoc 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Novibacillus 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Novosphingobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oblitimonas 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oceanicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oceanimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oceanisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oceanithermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oceanobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ochrobactrum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Octadecabacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Odoribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oenococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oleiphilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oleispira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oligella 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Olleya 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Olsenella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Opitutus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Orientia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ornithinimicrobium 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ornithobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Orrella 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oscillatoria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oscillibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ottowia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Owenweeksia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Oxalobacter 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Paenalcaligenes 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paenarthrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paenibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paeniclostridium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paenisporosarcina 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pajaroellobacter 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Palaeococcus 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1
Paludibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paludisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pandoraea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pannonibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pantoea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Parabacteroides 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paraburkholderia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Parachlamydia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paracoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Parageobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paraglaciecola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paraliobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paraoerskovia 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1
Paraphotobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pararhodospirillum 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Parascardovia 1 0 0 0 1 1 1 1 1 1 1 1 0 1 0 1 0 0 1 0
Parvibaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Parvimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Parvularcula 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pasteurella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Paucibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pectobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pediococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pedobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pelagibacterium 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pelosinus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Peptoclostridium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Peptoniphilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Persephonella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Petrimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Petrotoga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phaeobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phenylobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phoenicibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Photobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Photorhabdus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phreatobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phycicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phycisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Phyllobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Picrophilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pirellula 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Piscirickettsia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Planctopirus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Planktothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Planococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Plantibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Plesiomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pleurocapsa 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pluralibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Polaribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Polaromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Polymorphum 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Polynucleobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pontibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pontimonas 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1
Porphyrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Porphyromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pragia 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Prauserella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Prevotella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Prochlorococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Propionibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Propionimicrobium 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Prosthecochloris 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Proteiniphilum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Proteus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Providencia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudanabaena 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudarcicella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudarthrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudoalteromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudodesulfovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudogulbenkiania 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudohongiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudolabrys 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudonocardia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudopedobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudopropionibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudorhodoplanes 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudothermotoga 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Pseudovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pseudoxanthomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Psychrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Psychroflexus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Psychromonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pusillimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pyrobaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pyrococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pyrodictium 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Pyrolobus 0 0 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1
Rahnella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ralstonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ramlibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Raoultella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Raphidiopsis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rathayibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Reinekea 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Renibacterium 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Rhizobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhizobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodanobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodobaca 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodoferax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodoluna 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
Rhodomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodopirellula 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodoplanes 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodopseudomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodospirillum 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodothermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rhodovulum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rickettsia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Riemerella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rivularia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Robiginitalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseateles 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseburia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseibacterium 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseiflexus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseobacter 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Roseovarius 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rothia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rubinisphaera 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0
Rubrivivax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rubrobacter 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
Ruegeria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rufibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ruminiclostridium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Ruminococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rummeliibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Runella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Saccharomonospora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Saccharophagus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Saccharopolyspora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Saccharospirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Saccharothrix 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sagittula 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salegentibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salimicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinarchaeum 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0
Salinibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinibacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinicola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinigranum 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Salinimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinispira 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinivibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salinivirga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salipiger 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Salmonella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sandaracinus 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sanguibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Saprospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Scardovia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Scytonema 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sebaldella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sedimenticola 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sediminicola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sediminispirochaeta 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Segniliparus 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Selenomonas 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Seonamhaeicola 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Serinicoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Serratia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Shewanella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Shimwellia 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Shinella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Siansivirga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sideroxydans 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Simiduia 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Simkania 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Simonsiella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sinomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sinorhizobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Slackia 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0
Sneathia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Snodgrassella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sodalis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Solibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Solimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Solitalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphaerobacter 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Sphaerochaeta 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphaerospermopsis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphingobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphingobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphingomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphingopyxis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sphingorhabdus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Spiribacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Spiroplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Spirosoma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Spongiibacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sporosarcina 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Stackebrandtia 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Stanieria 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Staphylococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Staphylothermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Stappia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Starkeya 0 0 0 0 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1
Stenotrophomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Steroidobacter 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1
Stigmatella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Streptacidiphilus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Streptobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Streptococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Streptomyces 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Streptosporangium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfitobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfolobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfuricaulis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Sulfuricella 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Sulfuricurvum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfuriferula 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
Sulfurifustis 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 1
Sulfurihydrogenibium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfurimonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfurisphaera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfuritalea 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfurospirillum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sulfurovum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Sutterella 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Symbiobacterium 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Synechococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Synechocystis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Syntrophobotulus 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Syntrophomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Syntrophothermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Syntrophus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tannerella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tateyamaria 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tatlockia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tatumella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Taylorella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tenacibaculum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tepidanaerobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Teredinibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Terribacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Terriglobus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tessaracoccus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tetragenococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thalassococcus 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thalassolituus 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thalassospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thalassotalea 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thauera 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermacetogenium 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermaerobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermanaeromonas 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermanaerovibrio 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermincola 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermoanaerobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermoanaerobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermobaculum 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Thermobifida 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermobispora 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermocrinis 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermodesulfatator 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermodesulfobacterium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermodesulfobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermodesulfovibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermofilum 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermogutta 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0
Thermomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermomonospora 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermoplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermoproteus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermosediminibacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Thermosipho 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermosulfidibacter 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1
Thermosynechococcus 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Thermotoga 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thermovibrio 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0
Thermovirga 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
Thermus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thioalkalivibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thiobacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thioclava 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thiocystis 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Thioflavicoccus 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
Thiohalobacter 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Thiolapillus 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Thiomicrospira 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thiomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Thioploca 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tistrella 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tolumonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Treponema 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Trichodesmium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Trichormus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tropheryma 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0
Truepera 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Trueperella 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Tsukamurella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Turicibacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Turneriella 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Ureaplasma 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Vagococcus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Variibacter 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1
Variovorax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Veillonella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Verminephrobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Verrucomicrobium 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Vibrio 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Virgibacillus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Vitreoscilla 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Vogesella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Vulcanisaeta 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Vulgatibacter 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
Waddlia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Weeksella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Weissella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Wenyingzhuangia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Wenzhouxiangella 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0
Wigglesworthia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Winogradskyella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Woeseia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Wolbachia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Wolinella 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0
Xanthobacter 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Xanthomonas 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Xenorhabdus 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Xylanimonas 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Xylella 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Yersinia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Yoonia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Zhihengliuella 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
Zhongshania 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Zobellella 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Zobellia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Zunongwangia 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Zymobacter 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1
Zymomonas 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

ANCOM summary

This table shows the number of differentially abundant taxa between body sites as determined by ANCOM tests with different significance thresholds.

py$ancom_df %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Comparison Shared genera ANCOM 0.6 ANCOM 0.7 ANCOM 0.8 ANCOM 0.9
Blood vs saliva 1054 74 74 0 0
Blood vs buccal 1054 66 66 0 0
Saliva vs buccal 1054 0 0 0 0
Saliva vs saliva_euk 1054 0 0 0 0
Buccal vs buccal_euk 1054 6 6 6 0
Blood vs saliva_euk 1054 57 57 0 0
Blood vs buccal_euk 1054 28 28 0 0
Saliva_euk vs buccal_euk 1054 5 5 5 0


ANCOM heatmap

This is plotted using the ANCOM 0.7 results. I have removed all taxa that are present in all body sites. Yellow means present in all samples for that body site while purple means absent in all samples for that body site.

Note that the differences that we saw before with Klebsiella, Streptococcus and Prevotella are not present here as all of these are present in all samples.

new_new_db = new_db.rename(columns=site_dict)
keeping = []
taxa_new = list(new_new_db.index.values)
for tax in taxa_new:
  this_row = new_new_db.loc[tax, :].values
  if len(list(set(this_row))) == 1: keeping.append(False)
  else: keeping.append(True)
new_new_db = new_new_db.loc[keeping, :]
plot_heatmap(new_new_db, ancom[2])


Functional profiling

Function is profiled using:
1. HUMAnN2
- Uniref90 database
- Uniref50 database
2. HUMAnN3
- Uniref90 and Uniref50

HUMAnN unaligned reads

We can see here that it’s a bit variable as to whether HUMAnN3 classifies more or less reads than either of the databases with HUMAnN2 (HUMAnN3 uses both Uniref50 and Uniref90). Also, with HUMAnN3, all of the samples with no taxonomic classifications with MetaPhlAn3 have 48-49% unaligned, which is much less than any of the other samples.

plt.figure(figsize=(8,6))
ax1 = plt.subplot(111)

x1 = [x for x in range(21)]
x2 = [x+0.3 for x in range(21)]
x3 = [x+0.6 for x in range(21)]
#x = [x+0.2 for x in range(21)]

ax1.bar(x1, reads.loc[:, 'unaligned_after_translation_uniref_90'].values, color=colors, edgecolor='k', width=0.3)
ax1.bar(x2, reads.loc[:, 'unaligned_after_translation_uniref_50'].values, color=colors, edgecolor='k', width=0.3, alpha=0.7)
ax1.bar(x3, reads.loc[:, 'unaligned_after_translation_humann3'].values, color=colors, edgecolor='k', width=0.3, alpha=0.4)
plt.xticks(x2, sample_names, rotation=90)
plt.ylabel('Unaligned reads after translation (%)')
plt.xlim([-0.5,21])
handles = [Patch(facecolor='k', edgecolor='k', label='HUMAnN2 Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.7, label='HUMAnN2 Uniref50'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='HUMAnN3')]
ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1.02))

plt.tight_layout()
plt.show()


HUMAnN richness

After running HUMAnN2 and HUMAnN3, the pathabundance, pathcoverage and genefamilies tables are joined, and the pathabundance is renormalised (relative abundance calculated) and split to tables that are stratified by species or not. Here we use the unstratified pathabundance file (the MetaPhlAn2/MetaPhlAn3 species results don’t seem to have been particularly accurate). The genefamilies file is stratified by default so we take only the overall values for the gene family (removing all species results) and re-calculate these values as relative abundances.


pathways_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_pathabundance_relab_unstratified.tsv', header=0, index_col=0, sep='\t')
pathways_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_pathabundance_relab_unstratified.tsv', header=0, index_col=0, sep='\t')
pathways_h3 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_pathabundance_relab_unstratified.tsv', header=0, index_col=0, sep='\t')

def get_richness(df):
  snames = list(df.columns)
  num_genes = []
  for sn in snames:
    one_sample = pd.DataFrame(df.loc[:, sn])
    one_sample = one_sample[one_sample.max(axis=1) > 0]
    num_genes.append(one_sample.shape[0])
  return snames, num_genes
  
def genes_filter(genes):
  genes.drop('UNMAPPED', axis=0, inplace=True)
  gene_names = list(genes.index.values)
  keeping = []
  for gn in gene_names:
      new_gn = gn.split('|')[0]
      if gn == new_gn: keeping.append(True)
      else: keeping.append(False)
  genes_abs = genes.loc[keeping, :]
  genes = genes_abs.divide(genes_abs.sum(axis=0)).multiply(100)
  return genes_abs, genes

genes_90_not_cpm = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
genes_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
genes_90_ko = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies_cpm_ko_90.tsv', header=0, index_col=0, sep='\t')
genes_90_not_cpm = genes_filter(genes_90_not_cpm)[1]
genes_90 = genes_filter(genes_90)[1]
genes_90_ko = genes_filter(genes_90_ko)[1]

snames_genes_90, genes_90_richness = get_richness(genes_90)
snames_genes_90_ko, genes_90_richness_ko = get_richness(genes_90_ko)
snames_pways_90, pways_90_richness = get_richness(pathways_90)


genes_50_not_cpm = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
genes_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
genes_50_ko = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies_cpm_ko_50.tsv', header=0, index_col=0, sep='\t')
genes_50_not_cpm = genes_filter(genes_50_not_cpm)[1]
genes_50 = genes_filter(genes_50)[1]
genes_50_ko = genes_filter(genes_50_ko)[1]

snames_genes_50, genes_50_richness = get_richness(genes_50)
snames_genes_50_ko, genes_50_richness_ko = get_richness(genes_50_ko)
snames_pways_50, pways_50_richness = get_richness(pathways_50)

genes_h3_not_cpm = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies.tsv', header=0, index_col=0, sep='\t')
genes_h3 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
genes_h3_ko_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm_ko_50.tsv', header=0, index_col=0, sep='\t')
genes_h3_ko_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm_ko_90.tsv', header=0, index_col=0, sep='\t')
genes_h3_not_cpm = genes_filter(genes_h3_not_cpm)[1]
genes_h3 = genes_filter(genes_h3)[1]
genes_h3_ko_50 = genes_filter(genes_h3_ko_50)[1]
genes_h3_ko_90 = genes_filter(genes_h3_ko_90)[1]

snames_genes_h3, genes_h3_richness = get_richness(genes_h3)
snames_genes_h3_ko_50, genes_h3_richness_ko_50 = get_richness(genes_h3_ko_50)
snames_genes_h3_ko_90, genes_h3_richness_ko_90 = get_richness(genes_h3_ko_90)
snames_pways_h3, pways_h3_richness = get_richness(pathways_h3)

Ungrouped KEGG orthologs

Here I’ve used the built-in HUMAnN databases to map the Uniref gene families to KEGG orthologs, and this is a comparison of the proportion of reads that are ungrouped by using the mapping tables on the HUMAnN2 Uniref90 and Uniref50 databases and then the HUMAnN2 mapping tables separately on the same output for the Uniref90 and UNiref50 (even though these are combined in the HUMAnN2 output).

plt.figure(figsize=(8,6))
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)

x = [a for a in range(len(genes_50_ko.columns))]
x1 = [a+0.4 for a in x]

ax1.bar(x, genes_90_ko.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k')
ax1.bar(x1, genes_50_ko.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k', alpha=0.6)
ax2.bar(x, genes_h3_ko_90.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k')
ax2.bar(x1, genes_h3_ko_50.loc['UNGROUPED', :].values, width=0.4, color=colors, edgecolor='k', alpha=0.6)
plt.sca(ax1)
plt.xlim([-0.5,21]), plt.xticks([])
handles = [Patch(facecolor='k', edgecolor='k', label='Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.6, label='Uniref50')]
ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1.02))
plt.ylabel('Ungrouped\nKEGG orthologs (%)')
plt.ylim([0,100])
plt.sca(ax2)
plt.xlim([-0.5,21])
plt.ylim([0,100])
plt.ylabel('Ungrouped\nKEGG orthologs (%)')
plt.xticks(x1, sample_names, rotation=90)
plt.tight_layout()
plt.show()

Given how small the proportion of the Uniref gene families that have mapped to KEGG orthologs is, we will continue with the Uniref gene families, and we use the tables that have been normalised within HUMAnN to CPM.

# genes_50 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_50/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
# genes_50.drop('UNMAPPED', axis=0, inplace=True)
# gene_names = list(genes_50.index.values)
# keeping = []
# for gn in gene_names:
#     new_gn = gn.split('|')[0]
#     if gn == new_gn: keeping.append(True)
#     else: keeping.append(False)
# genes_50_abs = genes_50.loc[keeping, :]
# genes_50 = genes_50_abs.divide(genes_50_abs.sum(axis=0)).multiply(100)

# 
# snames_genes_50, genes_50_richness = get_richness(genes_50)
# snames_pways_50, pways_50_richness = get_richness(pathways_50)
# 
# genes_h3 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/humann3_final_out/humann3_genefamilies_cpm.tsv', header=0, index_col=0, sep='\t')
# genes_h3.drop('UNMAPPED', axis=0, inplace=True)
# gene_names = list(genes_h3.index.values)
# keeping = []
# for gn in gene_names:
#     new_gn = gn.split('|')[0]
#     if gn == new_gn: keeping.append(True)
#     else: keeping.append(False)
# genes_abs = genes_h3.loc[keeping, :]
# genes_h3 = genes_abs.divide(genes_abs.sum(axis=0)).multiply(100)
# 
# snames_genes, genes_richness = get_richness(genes_h3)
# snames_pways, pways_richness = get_richness(pathways)

# genes_90 = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/humann2_final_out_90/humann2_genefamilies.tsv', header=0, index_col=0, sep='\t')
# genes_90.drop('UNMAPPED', axis=0, inplace=True)
# gene_names = list(genes_90.index.values)
# keeping = []
# for gn in gene_names:
#     new_gn = gn.split('|')[0]
#     if gn == new_gn: keeping.append(True)
#     else: keeping.append(False)
# genes_90_abs = genes_90.loc[keeping, :]
# genes_90 = genes_90_abs.divide(genes_90_abs.sum(axis=0)).multiply(100)

Gene family richness


plt.figure(figsize=(8,6))
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)

x = [a for a in range(len(genes_90.columns))]
x1 = [a+0.3 for a in x]
x2 = [a+0.6 for a in x]

ax1.bar(x, genes_90_richness, width=0.3, color=colors, edgecolor='k')
ax2.bar(x, genes_90_richness, width=0.3, color=colors, edgecolor='k')
ax1.bar(x1[:len(genes_50_richness)], genes_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax2.bar(x1[:len(genes_50_richness)], genes_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax1.bar(x2, genes_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
ax2.bar(x2, genes_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
plt.sca(ax1)
plt.xlim([-0.5,21]), plt.xticks([])
handles = [Patch(facecolor='k', edgecolor='k', label='HUMAnN2 Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.7, label='HUMAnN2 Uniref50'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='HUMAnN3')]
ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1.02))
plt.ylabel('Gene family\nrichness')

plt.sca(ax2)
plt.semilogy()
plt.xlim([-0.5,21])
plt.ylabel('Gene family\nrichness (log scale)')
plt.xticks(x1, sample_names, rotation=90)
plt.tight_layout()
plt.show()

We can see here that the number of gene families is lower for HUMAnN3 than for either of the HUMAnN2 runs, aside from for the blood samples where we have higher richness for HUMAnN3 than Uniref90, but lower than Uniref50.

Pathway richness


plt.figure(figsize=(8,6))
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)

x = [a for a in range(len(genes_90.columns))]
x1 = [a+0.3 for a in x]
x2 = [a+0.6 for a in x]

ax1.bar(x, pways_90_richness, width=0.3, color=colors, edgecolor='k')
ax1.bar(x1, pways_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax2.bar(x, pways_90_richness, width=0.3, color=colors, edgecolor='k')
ax2.bar(x1, pways_50_richness, width=0.3, color=colors, edgecolor='k', alpha=0.7)
ax1.bar(x2, pways_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
ax2.bar(x2, pways_h3_richness, width=0.3, color=colors, alpha=0.4, edgecolor='k')
plt.sca(ax1)
plt.xticks([])
plt.ylabel('Pathway richness')
handles = [Patch(facecolor='k', edgecolor='k', label='HUMAnN2 Uniref90'), Patch(facecolor='k', edgecolor='k', alpha=0.7, label='HUMAnN2 Uniref50'), Patch(facecolor='k', edgecolor='k', alpha=0.4, label='HUMAnN3')]
ax1.legend(handles=handles, bbox_to_anchor=(1,1))
plt.xlim([-0.5,21])
plt.sca(ax2)
plt.semilogy()
plt.xlim([-0.5,21]), plt.xticks(x1, sample_names, rotation=90)
plt.ylabel('Pathway richness (log scale)')
plt.tight_layout()
plt.show()

Here we can see that HUMAnN3 gives us no pathways for the blood samples, and - as with the gene families - HUMAnN3 has lower numbers of pathways than either of the HUMAnN2 runs for all other samples.

HUMAnN nMDS plots

Gene families

plt.figure(figsize=(8,6))
ax1, ax2, ax3 = plt.subplot(221), plt.subplot(222), plt.subplot(223)
genes_50_t = genes_50.transpose()
pos_50, npos_50, nmds_stress_50 = transform_for_NMDS(genes_50_t)
genes_90_t = genes_90.transpose()
pos_90, npos_90, nmds_stress_90 = transform_for_NMDS(genes_90_t)
genes_h3_t = genes_h3.transpose()
pos_h3, npos_h3, nmds_stress_h3 = transform_for_NMDS(genes_h3_t)
for a in range(len(npos_50)):
   ax1.scatter(npos_50[a,0], npos_50[a,1], marker=shapes[a], color=colors[a], s=100)
   ax2.scatter(npos_90[a,0], npos_90[a,1], marker=shapes[a], color=colors[a], s=100)
   ax3.scatter(npos_h3[a,0], npos_h3[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1'), ax2.set_xlabel('nMDS1'), ax3.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2'), ax2.set_ylabel('nMDS2'), ax3.set_ylabel('nMDS2')
ax1.set_title('HUMAnN2 Uniref50'), ax2.set_title('HUMAnN2 Uniref90'), ax3.set_title('HUMAnN3')
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
plt.tight_layout()
ax3.legend(handles=handles1+handles2, loc='upper left', bbox_to_anchor=(1,1.02))
plt.show()

KEGG orthologs

plt.figure(figsize=(8,6))
ax1, ax2, ax3, ax4 = plt.subplot(221), plt.subplot(222), plt.subplot(223), plt.subplot(224)
genes_50_ko_t = genes_50_ko.transpose()
pos_50_ko, npos_50_ko, nmds_stress_50_ko = transform_for_NMDS(genes_50_ko_t)
genes_90_ko_t = genes_90_ko.transpose()
pos_90_ko, npos_90_ko, nmds_stress_90_ko = transform_for_NMDS(genes_90_ko_t)
genes_h3_ko_50_t = genes_h3_ko_50.transpose()
pos_h3_ko_50, npos_h3_ko_50, nmds_stress_h3_ko_50 = transform_for_NMDS(genes_h3_ko_50_t)
genes_h3_ko_90_t = genes_h3_ko_90.transpose()
pos_h3_ko_90, npos_h3_ko_90, nmds_stress_h3_ko_90 = transform_for_NMDS(genes_h3_ko_90_t)
for a in range(len(npos_50)):
   ax1.scatter(npos_50_ko[a,0], npos_50_ko[a,1], marker=shapes[a], color=colors[a], s=100)
   ax2.scatter(npos_90_ko[a,0], npos_90_ko[a,1], marker=shapes[a], color=colors[a], s=100)
   ax3.scatter(npos_h3_ko_50[a,0], npos_h3_ko_50[a,1], marker=shapes[a], color=colors[a], s=100)
   ax4.scatter(npos_h3_ko_90[a,0], npos_h3_ko_90[a,1], marker=shapes[a], color=colors[a], s=100)
ax1.set_xlabel('nMDS1'), ax2.set_xlabel('nMDS1'), ax3.set_xlabel('nMDS1'), ax4.set_xlabel('nMDS1')
ax1.set_ylabel('nMDS2'), ax2.set_ylabel('nMDS2'), ax3.set_ylabel('nMDS2'), ax4.set_ylabel('nMDS2')
ax1.set_title('HUMAnN2 Uniref50'), ax2.set_title('HUMAnN2 Uniref90'), ax3.set_title('HUMAnN3 Uniref50'), ax4.set_title('HUMAnN3 Uniref90'), 
handles1 = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
handles2 = [Line2D([0], [0], marker=shapes_dict[shape], color='w', label=shape, markerfacecolor='k', markersize=15) for shape in shapes_dict]
ax4.legend(handles=handles1+handles2, loc='upper left', bbox_to_anchor=(1,1.02))
plt.tight_layout()
plt.show()

HUMAnN2 differential abundance between body sites

Now we are looking at the gene families, pathways and KEGG orthologs that are differentially abundant between body sites, using the lists that are output by HUMAnN2.

def get_diff_abundant(genes, name_csv, fp= '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/'):
    col_names = list(genes.columns)
    col_names_dict = {}
    for cn in col_names:
        if 'SRR8595488' in cn:
          genes.drop([cn], axis=1, inplace=True)
        cn_new = cn.split('_')[0]
        col_names_dict[cn] = site_dict[cn_new]
    new_genes = genes.rename(columns=col_names_dict)
    comparisons = [['Saliva', 'Blood'], ['Buccal', 'Blood'], ['Saliva', 'Buccal'], ['Saliva', 'Saliva_euk'], ['Buccal', 'Buccal_euk'], ['Blood', 'Saliva_euk'], ['Blood', 'Buccal_euk']]
    fig = plt.figure(figsize=(15,8))
    ax = [plt.subplot2grid((2,8), (0,1), colspan=2), plt.subplot2grid((2,8), (0,3), colspan=2), plt.subplot2grid((2,8), (0,5), colspan=2), plt.subplot2grid((2,8), (1,0), colspan=2), plt.subplot2grid((2,8), (1,2), colspan=2), plt.subplot2grid((2,8), (1,4), colspan=2), plt.subplot2grid((2,8), (1,6), colspan=2)]
    for a in range(len(comparisons)):
        ax[a].set_title(comparisons[a][0]+r' $vs$ '+comparisons[a][1])
        this_comparison = new_genes.loc[:, comparisons[a]]
        this_comparison = this_comparison[this_comparison.max(axis=1) > 0]
        genes_comp = list(this_comparison.index.values)
        fc, pval, colors_p, this_fc = [], [], [], []
        for b in range(len(genes_comp)):
            c1, c2 = list(this_comparison.loc[genes_comp[b], comparisons[a][0]]), list(this_comparison.loc[genes_comp[b], comparisons[a][1]])
            
            c1, c2 = [0.000001 if x == 0 else x for x in c1], [0.000001 if x == 0 else x for x in c2]
            c1, c2 = [math.log2(c1[c]) for c in range(len(c1))], [math.log2(c2[c]) for c in range(len(c2))]
            t, p = stats.ttest_ind(c1, c2)
            c1, c2 = np.median(c1), np.median(c2)
            diff = c2-c1
            if diff < 1 and diff > 0: diff = -(1/diff)
            fc.append(diff), pval.append(p)
            this_fc.append([genes_comp[b], diff, p])
        colors_p_all = [colors_dict[comparisons[a][0]], colors_dict[comparisons[a][1]]]
        for c in range(len(fc)):
            if fc[c] >= 2 and pval[c] <= 0.01: colors_p.append(colors_p_all[1])
            elif fc[c] <= -2 and pval[c] <= 0.01: colors_p.append(colors_p_all[0])
            else: colors_p.append('#B1B1B1')
            pval[c] = -math.log10(pval[c])
        com = comparisons[a][0]+'_'+comparisons[a][1]
        if a == 0:
          df_pval = pd.DataFrame(this_fc, columns=['Genes/pathways', com+'_FC', com+'_p'])
          df_pval.set_index('Genes/pathways', inplace=True)
        else:
          new_df = pd.DataFrame(this_fc, columns=['Genes/pathways', com+'_FC', com+'_p'])
          new_df.set_index('Genes/pathways', inplace=True)
          df_pval = pd.concat([df_pval, new_df], axis=1, join='outer')
        ma = max([max(fc), abs(min(fc))])+0.5
        if ma > 10: ma = 10.5
        ax[a].set_xlim([-ma, ma])
        ax[a].scatter(fc, pval, marker='o', c=colors_p, s=10)
        ax[a].set_xlabel(r'Log$_2$ fold change')
        if a == 0 or a == 3:
          ax[a].set_ylabel('-log(p value)')
        plt.sca(ax[a])
    df_pval.to_csv(fp+name_csv+'.csv')
    handles = [Patch(facecolor=colors_dict[color], edgecolor='k', label=color) for color in colors_dict]
    ax[2].legend(handles=handles, bbox_to_anchor=(1.04,1), loc='upper left')
    return

Number of differentially abundant pathways between body sites (Uniref90)

Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(pathways_90)
get_diff_abundant(pways, 'pathways_90')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant pathways between body sites (Uniref50)

Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(pathways_50)
get_diff_abundant(pways, 'pathways_50')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant gene families between body sites (Uniref90)

Volcano plot showing the number of differentially abundant gene families between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(genes_90)
get_diff_abundant(pways, 'genes_90')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant gene families between body sites (Uniref50)

Volcano plot showing the number of differentially abundant gene families between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(genes_50)
get_diff_abundant(pways, 'genes_50')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant KEGG orthologs between body sites (Uniref90)

Volcano plot showing the number of differentially abundant gene families between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(genes_90_ko)
get_diff_abundant(pways, 'genes_90_ko')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant KEGG orthologs between body sites (Uniref50)

Volcano plot showing the number of differentially abundant KEGG orthologs between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(genes_50_ko)
get_diff_abundant(pways, 'genes_50_ko')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

HUMAnN3 differential abundance between body sites

Now we are looking at the gene families and pathways that are differentially abundant between body sites, using the lists that are output by HUMAnN3.

Number of differentially abundant pathways between body sites

Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(pathways_h3)
get_diff_abundant(pways, 'pathways', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant gene families between body sites

Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

pways = pd.DataFrame(genes_h3)
get_diff_abundant(pways, 'genes', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant KEGG orthologs between body sites (Uniref90)

Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

Note that now we have a separation of Uniref90 and Uniref50 because this is what the mapping files do. I’m not sure yet if it’s appropriate to just combine these two tables or now.

pways = pd.DataFrame(genes_h3_ko_90)
get_diff_abundant(pways, 'genes_ko_90', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

Number of differentially abundant KEGG orthologs between body sites (Uniref50)

Volcano plot showing the number of differentially abundant pathways between body sites. Colored points denote genes that are significantly differentially abundant (T-test p<0.01 and log2 (median) FC > 2 or < -2) and they are colored by which body site they are highest in abundance in. Note that very large fold changes are not shown. Initial results showed that there were very few genes with > 10 or < -10, so the maximum plotted is this.

Note that now we have a separation of Uniref90 and Uniref50 because this is what the mapping files do. I’m not sure yet if it’s appropriate to just combine these two tables or now.

pways = pd.DataFrame(genes_h3_ko_50)
get_diff_abundant(pways, 'genes_ko_50', fp = '/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/')
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

HUMAnN most differentially abundant pathways

For each of these, I am initially filtering out to only P values that are below 0.01, and then am taking only the most up- or down-regulated pathways (50 of each) and printing them to a table.

HUMAnN2

For HUMAnN2 I am now looking at the pathways assigned to the Uniref50 database.

pathways = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/pathways_50.csv', header=0, index_col=0)

Blood vs saliva

Negative fold changes are higher in saliva samples while positive are higher in blood samples.

bs = pathways.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs = bs[bs['Saliva_Blood_p'] < 0.01]
bs = bs.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs = pd.concat([bs.head(n=50), bs.tail(n=50)])
bs.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-6748: nitrate reduction VII (denitrification) 3.353935 0.0050250
PWY66-388: fatty acid &alpha;-oxidation III 2.359026 0.0008840
PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) 2.298469 0.0007984
PWY-7090: UDP-2,3-diacetamido-2,3-dideoxy-&alpha;-D-mannuronate biosynthesis 1.992666 0.0000075
LIPASYN-PWY: phospholipases 1.121379 0.0027953
PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine) 1.064014 0.0000213
PWY-7433: mucin core 1 and core 2 O-glycosylation -1.003870 0.0030829
URSIN-PWY: ureide biosynthesis -1.125074 0.0005477
PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) -1.177346 0.0000897
THREOCAT-PWY: superpathway of L-threonine metabolism -1.222813 0.0000033
RHAMCAT-PWY: L-rhamnose degradation I -1.225334 0.0065949
FUC-RHAMCAT-PWY: superpathway of fucose and rhamnose degradation -1.462906 0.0028178
GALACTUROCAT-PWY: D-galacturonate degradation I -1.599911 0.0093617
PWY-6344: L-ornithine degradation II (Stickland reaction) -1.650884 0.0001819
PWY-7039: phosphatidate metabolism, as a signaling molecule -1.752074 0.0017477
ARGININE-SYN4-PWY: L-ornithine de novo biosynthesis -1.803634 0.0004227
PWY-622: starch biosynthesis -1.830364 0.0060239
PWY-5392: reductive TCA cycle II -1.871932 0.0011918
PWY-2201: folate transformations I -1.900549 0.0000170
P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) -1.921687 0.0073514
FUCCAT-PWY: fucose degradation -1.968721 0.0046833
PWY-6630: superpathway of L-tyrosine biosynthesis -2.047251 0.0001833
PWY-6590: superpathway of Clostridium acetobutylicum acidogenic fermentation -2.052614 0.0068845
PWY0-42: 2-methylcitrate cycle I -2.058113 0.0037055
PWY-6823: molybdenum cofactor biosynthesis -2.141908 0.0003348
PWY-5747: 2-methylcitrate cycle II -2.206359 0.0027051
TCA-GLYOX-BYPASS: superpathway of glyoxylate bypass and TCA -2.265081 0.0006474
PWY-5177: glutaryl-CoA degradation -2.330080 0.0006336
PWY-6708: ubiquinol-8 biosynthesis (prokaryotic) -2.343357 0.0090424
PWY-5857: ubiquinol-10 biosynthesis (prokaryotic) -2.343357 0.0090424
PWY-5856: ubiquinol-9 biosynthesis (prokaryotic) -2.343357 0.0090424
PWY-5855: ubiquinol-7 biosynthesis (prokaryotic) -2.343357 0.0090424
DENITRIFICATION-PWY: nitrate reduction I (denitrification) -2.395529 0.0000040
PWY-6749: CMP-legionaminate biosynthesis I -2.448271 0.0020433
P162-PWY: L-glutamate degradation V (via hydroxyglutarate) -2.467260 0.0044743
CITRULBIO-PWY: L-citrulline biosynthesis -2.523788 0.0034968
PWY-821: superpathway of sulfur amino acid biosynthesis (Saccharomyces cerevisiae) -2.714446 0.0013771
COBALSYN-PWY: adenosylcobalamin salvage from cobinamide I -2.748948 0.0011485
PWY-6891: thiazole biosynthesis II (Bacillus) -2.765335 0.0006256
PWY-7003: glycerol degradation to butanol -2.827942 0.0099335
PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II -2.884515 0.0032543
PWY-5104: L-isoleucine biosynthesis IV -2.983969 0.0048673
PWY-5384: sucrose degradation IV (sucrose phosphorylase) -2.985100 0.0001422
PWY-5088: L-glutamate degradation VIII (to propanoate) -3.049735 0.0034401
PWY-6892: thiazole biosynthesis I (E. coli) -3.057563 0.0003621
PWY-6263: superpathway of menaquinol-8 biosynthesis II -3.136512 0.0009763
PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV -3.140996 0.0069799
P164-PWY: purine nucleobases degradation I (anaerobic) -3.185865 0.0007588
GLYCOLYSIS-TCA-GLYOX-BYPASS: superpathway of glycolysis, pyruvate dehydrogenase, TCA, and glyoxylate bypass -3.194745 0.0000566
PWY-6549: L-glutamine biosynthesis III -3.214460 0.0000285
ARO-PWY: chorismate biosynthesis I -6.932112 0.0000010
ANAEROFRUCAT-PWY: homolactic fermentation -6.944961 0.0000022
PWY-5103: L-isoleucine biosynthesis III -6.947366 0.0000008
COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis -6.951019 0.0000009
BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis -6.974922 0.0000009
CALVIN-PWY: Calvin-Benson-Bassham cycle -6.977160 0.0000012
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II -7.010723 0.0000002
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II -7.010723 0.0000002
PWY-5097: L-lysine biosynthesis VI -7.026101 0.0000009
PWY-7539: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis III (Chlamydia) -7.030599 0.0000066
DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I -7.038235 0.0000013
PWY-6163: chorismate biosynthesis from 3-dehydroquinate -7.045429 0.0000015
PWY-7208: superpathway of pyrimidine nucleobases salvage -7.046760 0.0000005
DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II -7.048013 0.0000007
VALSYN-PWY: L-valine biosynthesis -7.080156 0.0000007
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) -7.080156 0.0000007
PWY-7111: pyruvate fermentation to isobutanol (engineered) -7.085083 0.0000005
PWY-2942: L-lysine biosynthesis III -7.099831 0.0000010
PWY-5484: glycolysis II (from fructose 6-phosphate) -7.100485 0.0000017
PWY-5100: pyruvate fermentation to acetate and lactate II -7.112480 0.0000013
TRNA-CHARGING-PWY: tRNA charging -7.114433 0.0000019
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II -7.115824 0.0000005
NONMEVIPP-PWY: methylerythritol phosphate pathway I -7.145606 0.0000027
PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I -7.148419 0.0000004
PWY-841: superpathway of purine nucleotides de novo biosynthesis I -7.160267 0.0000007
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I -7.177065 0.0000006
PWY-3841: folate transformations II -7.183704 0.0000020
PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I -7.195526 0.0000039
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis -7.203456 0.0000003
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II -7.203456 0.0000003
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I -7.209498 0.0000005
PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) -7.212900 0.0000006
PWY-6385: peptidoglycan biosynthesis III (mycobacteria) -7.219420 0.0000023
COA-PWY-1: coenzyme A biosynthesis II (mammalian) -7.223528 0.0000009
1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis -7.229362 0.0000017
PWY-5686: UMP biosynthesis -7.260887 0.0000012
GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) -7.272773 0.0000014
PWY-5695: urate biosynthesis/inosine 5’-phosphate degradation -7.277619 0.0000019
PWY66-400: glycolysis VI (metazoan) -7.313957 0.0000012
PWY-6151: S-adenosyl-L-methionine cycle I -7.319377 0.0000015
ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) -7.328932 0.0000009
PWY-6700: queuosine biosynthesis -7.332738 0.0000019
PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II -7.376218 0.0000004
PWY-7221: guanosine ribonucleotides de novo biosynthesis -7.378509 0.0000010
PWY-1042: glycolysis IV (plant cytosol) -7.447206 0.0000013
PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) -7.494650 0.0000013
PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) -7.502355 0.0000012
PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) -7.507127 0.0000010
PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I -7.541532 0.0000004
PWY-7219: adenosine ribonucleotides de novo biosynthesis -7.794523 0.0000008

Blood vs buccal

Negative fold changes are higher in buccal samples while positive are higher in blood samples.

bb = pathways.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb = bb[bb['Buccal_Blood_p'] < 0.01]
bb= bb.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb = pd.concat([bb.head(n=50), bb.tail(n=50)])
bb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-3781: aerobic respiration I (cytochrome c) 1.810779 0.0033223
PWY-7007: methyl ketone biosynthesis 1.424266 0.0000012
GLYCOLYSIS-TCA-GLYOX-BYPASS: superpathway of glycolysis, pyruvate dehydrogenase, TCA, and glyoxylate bypass -1.395076 0.0088227
SO4ASSIM-PWY: sulfate reduction I (assimilatory) -1.614670 0.0004843
PWY-6969: TCA cycle V (2-oxoglutarate:ferredoxin oxidoreductase) -1.750007 0.0070212
PWY-5840: superpathway of menaquinol-7 biosynthesis -1.808867 0.0031228
PWY-7196: superpathway of pyrimidine ribonucleosides salvage -1.853108 0.0030499
P108-PWY: pyruvate fermentation to propanoate I -1.853532 0.0005303
HISDEG-PWY: L-histidine degradation I -1.919521 0.0004811
PWY-5920: superpathway of heme biosynthesis from glycine -2.061224 0.0000282
GLYCOCAT-PWY: glycogen degradation I (bacterial) -2.074745 0.0003171
TCA: TCA cycle I (prokaryotic) -2.141814 0.0014856
PWY-5690: TCA cycle II (plants and fungi) -2.183025 0.0006881
PWY-6901: superpathway of glucose and xylose degradation -2.187600 0.0019796
SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis -2.301408 0.0000383
NAGLIPASYN-PWY: lipid IVA biosynthesis -2.311088 0.0018788
P125-PWY: superpathway of (R,R)-butanediol biosynthesis -2.337138 0.0047167
PWY-6628: superpathway of L-phenylalanine biosynthesis -2.354140 0.0019767
PWY66-398: TCA cycle III (animals) -2.390681 0.0002442
GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff -2.396497 0.0001922
PYRIDOXSYN-PWY: pyridoxal 5’-phosphate biosynthesis I -2.518405 0.0004261
PWY-7013: L-1,2-propanediol degradation -2.568330 0.0056055
PWY-7115: C4 photosynthetic carbon assimilation cycle, NAD-ME type -2.699176 0.0029609
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I -2.717392 0.0002948
PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) -2.744253 0.0000224
HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) -2.745087 0.0000196
PWY-7211: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis -2.752876 0.0000002
PWY0-1061: superpathway of L-alanine biosynthesis -2.788405 0.0028952
ARGORNPROST-PWY: arginine, ornithine and proline interconversion -2.811587 0.0001781
PWY-6383: mono-trans, poly-cis decaprenyl phosphate biosynthesis -2.812851 0.0008988
PWY-5863: superpathway of phylloquinol biosynthesis -2.856290 0.0000619
P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle) -2.861928 0.0053614
PWY66-389: phytol degradation -2.924220 0.0001742
PWY-4981: L-proline biosynthesis II (from arginine) -2.925803 0.0000654
PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle -2.936548 0.0006690
PWY-5384: sucrose degradation IV (sucrose phosphorylase) -2.955020 0.0028961
PWY-5791: 1,4-dihydroxy-2-naphthoate biosynthesis II (plants) -2.955055 0.0001177
PWY-5837: 1,4-dihydroxy-2-naphthoate biosynthesis I -2.955055 0.0001177
P122-PWY: heterolactic fermentation -3.030378 0.0041709
PWY0-845: superpathway of pyridoxal 5’-phosphate biosynthesis and salvage -3.046322 0.0001500
PWY-5189: tetrapyrrole biosynthesis II (from glycine) -3.122875 0.0002898
PWY-5918: superpathay of heme biosynthesis from glutamate -3.146932 0.0000004
GLUTORN-PWY: L-ornithine biosynthesis -3.237653 0.0014052
PWY-5154: L-arginine biosynthesis III (via N-acetyl-L-citrulline) -3.244932 0.0020478
ARG+POLYAMINE-SYN: superpathway of arginine and polyamine biosynthesis -3.264731 0.0037416
PWY-6897: thiamin salvage II -3.270383 0.0001049
PWY-6527: stachyose degradation -3.270419 0.0004801
PWY-5989: stearate biosynthesis II (bacteria and plants) -3.274229 0.0001892
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -3.281298 0.0000000
P461-PWY: hexitol fermentation to lactate, formate, ethanol and acetate -3.317485 0.0019796
PWY66-422: D-galactose degradation V (Leloir pathway) -5.533690 0.0030667
PWY-6317: galactose degradation I (Leloir pathway) -5.533690 0.0040000
PWY0-1296: purine ribonucleosides degradation -5.534295 0.0001734
PWY-2942: L-lysine biosynthesis III -5.565221 0.0000109
LACTOSECAT-PWY: lactose and galactose degradation I -5.567233 0.0025906
PWY-5686: UMP biosynthesis -5.574031 0.0000801
ARO-PWY: chorismate biosynthesis I -5.588917 0.0001164
THRESYN-PWY: superpathway of L-threonine biosynthesis -5.594116 0.0000734
PWY-7199: pyrimidine deoxyribonucleosides salvage -5.595006 0.0048719
PWY-3841: folate transformations II -5.628953 0.0000703
UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I -5.638122 0.0001103
COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis -5.642542 0.0000879
COA-PWY-1: coenzyme A biosynthesis II (mammalian) -5.669929 0.0000608
PWY-841: superpathway of purine nucleotides de novo biosynthesis I -5.670172 0.0000479
PWY-6385: peptidoglycan biosynthesis III (mycobacteria) -5.704447 0.0000689
PWY-6700: queuosine biosynthesis -5.705070 0.0000935
PWY-3001: superpathway of L-isoleucine biosynthesis I -5.706412 0.0000531
PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) -5.707573 0.0000435
PWY66-400: glycolysis VI (metazoan) -5.714766 0.0000467
PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) -5.721121 0.0000587
PWY-6151: S-adenosyl-L-methionine cycle I -5.725540 0.0000531
PWY0-1319: CDP-diacylglycerol biosynthesis II -5.756408 0.0001248
PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) -5.769150 0.0000592
DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II -5.776011 0.0000736
PWY-6737: starch degradation V -5.788743 0.0016341
PWY-1042: glycolysis IV (plant cytosol) -5.788916 0.0000241
ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) -5.791165 0.0000196
PWY-5103: L-isoleucine biosynthesis III -5.808592 0.0000335
BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis -5.809792 0.0000297
PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II -5.835531 0.0000828
PWY-7197: pyrimidine deoxyribonucleotide phosphorylation -5.853169 0.0000647
PWY-7208: superpathway of pyrimidine nucleobases salvage -5.879577 0.0000531
PWY-7221: guanosine ribonucleotides de novo biosynthesis -5.930624 0.0000170
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II -5.954771 0.0000291
PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) -5.966907 0.0000549
PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I -5.981678 0.0000161
PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I -6.009680 0.0000506
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I -6.014350 0.0000246
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I -6.034025 0.0000437
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) -6.034119 0.0000277
VALSYN-PWY: L-valine biosynthesis -6.034119 0.0000277
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II -6.049958 0.0000323
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis -6.049958 0.0000323
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II -6.101319 0.0000471
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II -6.101319 0.0000471
PWY-7111: pyruvate fermentation to isobutanol (engineered) -6.269354 0.0000386
PWY-6609: adenine and adenosine salvage III -6.345057 0.0006892
PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II -6.359716 0.0000637
PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I -6.446709 0.0000598
PWY-7219: adenosine ribonucleotides de novo biosynthesis -6.623166 0.0000374

Saliva vs buccal

Negative fold changes are higher in saliva samples while positive are higher in buccal samples.

sb = pathways.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb = sb[sb['Saliva_Buccal_p'] < 0.01]
sb= sb.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb = pd.concat([sb.head(n=50), sb.tail(n=50)])
sb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY66-388: fatty acid &alpha;-oxidation III 2.359026 0.0008840
PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) 2.298469 0.0018401
PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine) 1.303619 0.0015578
LIPASYN-PWY: phospholipases 1.121379 0.0027953
PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) -1.056819 0.0001084
URSIN-PWY: ureide biosynthesis -1.125074 0.0005477
THREOCAT-PWY: superpathway of L-threonine metabolism -1.222813 0.0000033
GLYCOCAT-PWY: glycogen degradation I (bacterial) -1.319259 0.0087915
PWY0-781: aspartate superpathway -1.450322 0.0032561
FUC-RHAMCAT-PWY: superpathway of fucose and rhamnose degradation -1.462906 0.0028178
DAPLYSINESYN-PWY: L-lysine biosynthesis I -1.475467 0.0050118
ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) -1.530598 0.0098398
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -1.584070 0.0014933
GALACTUROCAT-PWY: D-galacturonate degradation I -1.599911 0.0093617
PWY-6344: L-ornithine degradation II (Stickland reaction) -1.650884 0.0001819
RHAMCAT-PWY: L-rhamnose degradation I -1.670477 0.0096442
ARGORNPROST-PWY: arginine, ornithine and proline interconversion -1.680905 0.0084875
PWY-6859: all-trans-farnesol biosynthesis -1.700749 0.0090943
PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle -1.740130 0.0054514
PWY66-398: TCA cycle III (animals) -1.800719 0.0020909
PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis -1.807031 0.0063393
PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) -1.837946 0.0056222
PWY4FS-8: phosphatidylglycerol biosynthesis II (non-plastidic) -1.848239 0.0085834
PWY-5392: reductive TCA cycle II -1.871932 0.0034699
PWY-5097: L-lysine biosynthesis VI -1.872334 0.0050720
ARGSYN-PWY: L-arginine biosynthesis I (via L-ornithine) -1.915180 0.0076239
GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) -1.932810 0.0099337
PWY-7400: L-arginine biosynthesis IV (archaebacteria) -1.942100 0.0084078
ASPASN-PWY: superpathway of L-aspartate and L-asparagine biosynthesis -1.964838 0.0084397
FUCCAT-PWY: fucose degradation -1.968721 0.0085177
PWY-5690: TCA cycle II (plants and fungi) -1.999274 0.0020765
PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) -2.008031 0.0057161
PWY-5667: CDP-diacylglycerol biosynthesis I -2.010024 0.0034229
SO4ASSIM-PWY: sulfate reduction I (assimilatory) -2.026512 0.0067341
PWY-7007: methyl ketone biosynthesis -2.037466 0.0026320
PWY-6630: superpathway of L-tyrosine biosynthesis -2.047251 0.0005294
PWY-6590: superpathway of Clostridium acetobutylicum acidogenic fermentation -2.052614 0.0074876
TRNA-CHARGING-PWY: tRNA charging -2.052633 0.0086581
PWY0-42: 2-methylcitrate cycle I -2.058113 0.0033138
PWY0-862: (5Z)-dodec-5-enoate biosynthesis -2.069680 0.0045255
PWY-7392: taxadiene biosynthesis (engineered) -2.154615 0.0030503
PWY-5121: superpathway of geranylgeranyl diphosphate biosynthesis II (via MEP) -2.158286 0.0027898
DENITRIFICATION-PWY: nitrate reduction I (denitrification) -2.163812 0.0020431
SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis -2.196655 0.0042274
PWY-5747: 2-methylcitrate cycle II -2.206359 0.0024559
PWY-5188: tetrapyrrole biosynthesis I (from glutamate) -2.244322 0.0025818
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I -2.259807 0.0022405
PWY-7664: oleate biosynthesis IV (anaerobic) -2.265805 0.0024839
PWY-5863: superpathway of phylloquinol biosynthesis -2.267122 0.0011358
PWY-7560: methylerythritol phosphate pathway II -2.271288 0.0019152
ANAEROFRUCAT-PWY: homolactic fermentation -2.681518 0.0029751
PWY-6305: putrescine biosynthesis IV -2.692702 0.0029161
PWY-6549: L-glutamine biosynthesis III -2.697465 0.0006607
PWY-821: superpathway of sulfur amino acid biosynthesis (Saccharomyces cerevisiae) -2.714446 0.0013771
PWY-7663: gondoate biosynthesis (anaerobic) -2.724747 0.0007663
P108-PWY: pyruvate fermentation to propanoate I -2.742366 0.0014147
PWY-6891: thiazole biosynthesis II (Bacillus) -2.765335 0.0006256
GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff -2.774376 0.0019819
PWY-5177: glutaryl-CoA degradation -2.776678 0.0088468
PWY-6897: thiamin salvage II -2.793742 0.0036289
RIBOSYN2-PWY: flavin biosynthesis I (bacteria and plants) -2.794366 0.0027831
HEMESYN2-PWY: heme biosynthesis II (anaerobic) -2.807997 0.0001720
TCA: TCA cycle I (prokaryotic) -2.819134 0.0045786
PWY-5484: glycolysis II (from fructose 6-phosphate) -2.824266 0.0022239
PWY-7003: glycerol degradation to butanol -2.827942 0.0088582
NONMEVIPP-PWY: methylerythritol phosphate pathway I -2.830198 0.0018007
PWY-5920: superpathway of heme biosynthesis from glycine -2.850742 0.0003789
PWY66-389: phytol degradation -2.857682 0.0020576
PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II -2.884515 0.0029288
PWY-5840: superpathway of menaquinol-7 biosynthesis -2.893458 0.0042677
PWY-6628: superpathway of L-phenylalanine biosynthesis -2.905931 0.0023358
HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) -2.955786 0.0010630
HISDEG-PWY: L-histidine degradation I -2.959526 0.0057620
PWY-5104: L-isoleucine biosynthesis IV -2.983969 0.0072339
PWY-5088: L-glutamate degradation VIII (to propanoate) -3.049735 0.0034401
PWY-6892: thiazole biosynthesis I (E. coli) -3.057563 0.0006771
PWY0-1415: superpathway of heme biosynthesis from uroporphyrinogen-III -3.064061 0.0016315
PANTO-PWY: phosphopantothenate biosynthesis I -3.088549 0.0019909
PWY-7254: TCA cycle VII (acetate-producers) -3.091493 0.0019959
PWY-6263: superpathway of menaquinol-8 biosynthesis II -3.136512 0.0009392
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I -3.136571 0.0017682
REDCITCYC: TCA cycle VIII (helicobacter) -3.177954 0.0008579
PYRIDNUCSYN-PWY: NAD biosynthesis I (from aspartate) -3.304996 0.0009613
PYRIDNUCSAL-PWY: NAD salvage pathway I -3.338072 0.0001963
PWY-241: C4 photosynthetic carbon assimilation cycle, NADP-ME type -3.356106 0.0030459
PWY-6545: pyrimidine deoxyribonucleotides de novo biosynthesis III -3.434542 0.0015424
PWY-7389: superpathway of anaerobic energy metabolism (invertebrates) -3.497806 0.0008188
PWY-5101: L-isoleucine biosynthesis II -3.534535 0.0037582
PWY4LZ-257: superpathway of fermentation (Chlamydomonas reinhardtii) -3.576991 0.0009879
P23-PWY: reductive TCA cycle I -3.688232 0.0007033
PWY-7384: anaerobic energy metabolism (invertebrates, mitochondrial) -3.881671 0.0019779
PWY-6803: phosphatidylcholine acyl editing -3.975240 0.0001431
PWY-7323: superpathway of GDP-mannose-derived O-antigen building blocks biosynthesis -4.007135 0.0018467
P42-PWY: incomplete reductive TCA cycle -4.266303 0.0005118
THISYN-PWY: superpathway of thiamin diphosphate biosynthesis I -4.416848 0.0005514
COLANSYN-PWY: colanic acid building blocks biosynthesis -4.530580 0.0015287
PWY-6895: superpathway of thiamin diphosphate biosynthesis II -4.705532 0.0000302
PWY-5941: glycogen degradation II (eukaryotic) -4.758591 0.0033992
PWY-5030: L-histidine degradation III -4.958765 0.0019046
PWY-6992: 1,5-anhydrofructose degradation -5.695308 0.0080382

Saliva vs saliva_euk

Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.

ss = pathways.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss = ss[ss['Saliva_Saliva_euk_p'] < 0.01]
ss = ss.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss = pd.concat([ss.head(n=50), ss.tail(n=50)])
ss = ss.groupby(by=ss.index, axis=0).mean()
ss.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
URSIN-PWY: ureide biosynthesis -1.125074 0.0005477
PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine) -1.153830 0.0080835
FUC-RHAMCAT-PWY: superpathway of fucose and rhamnose degradation -1.479367 0.0034393
PWY-5392: reductive TCA cycle II -1.593244 0.0050213
PWY-6344: L-ornithine degradation II (Stickland reaction) -1.650884 0.0001819
PWY-622: starch biosynthesis -1.830364 0.0068348
PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) -1.901485 0.0060668
TCA-GLYOX-BYPASS: superpathway of glyoxylate bypass and TCA -1.950438 0.0061700
GLYCOLYSIS-TCA-GLYOX-BYPASS: superpathway of glycolysis, pyruvate dehydrogenase, TCA, and glyoxylate bypass -2.017885 0.0079801
P125-PWY: superpathway of (R,R)-butanediol biosynthesis -2.029921 0.0061998
PWY-6901: superpathway of glucose and xylose degradation -2.336738 0.0083937
DENITRIFICATION-PWY: nitrate reduction I (denitrification) -2.410953 0.0030787
PWY-5384: sucrose degradation IV (sucrose phosphorylase) -2.497972 0.0063036
PYRIDNUCSAL-PWY: NAD salvage pathway I -3.338072 0.0001660

Buccal vs buccal_euk

Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.

bbe = pathways.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe = bbe[bbe['Buccal_Buccal_euk_p'] < 0.01]
bbe = bbe.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe = pd.concat([bbe.head(n=50), bbe.tail(n=50)])
bbe = bbe.groupby(by=bbe.index, axis=0).mean()
bbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-7007: methyl ketone biosynthesis 1.424266 0.0000012
SO4ASSIM-PWY: sulfate reduction I (assimilatory) -1.614670 0.0004649
PWY-6969: TCA cycle V (2-oxoglutarate:ferredoxin oxidoreductase) -1.750007 0.0070212
PWY-5840: superpathway of menaquinol-7 biosynthesis -1.808867 0.0031228
PWY-7196: superpathway of pyrimidine ribonucleosides salvage -1.853108 0.0030499
P108-PWY: pyruvate fermentation to propanoate I -1.853532 0.0010202
HISDEG-PWY: L-histidine degradation I -1.919521 0.0045971
PWY-5920: superpathway of heme biosynthesis from glycine -2.061224 0.0000282
GLYCOCAT-PWY: glycogen degradation I (bacterial) -2.074745 0.0003171
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -2.121906 0.0059900
PWY-6901: superpathway of glucose and xylose degradation -2.187600 0.0019796
SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis -2.301408 0.0000380
NAGLIPASYN-PWY: lipid IVA biosynthesis -2.311088 0.0018788
PWY-7663: gondoate biosynthesis (anaerobic) -2.359405 0.0058709
PWY66-398: TCA cycle III (animals) -2.390681 0.0003488
PWY-6383: mono-trans, poly-cis decaprenyl phosphate biosynthesis -2.408960 0.0099503
PYRIDOXSYN-PWY: pyridoxal 5’-phosphate biosynthesis I -2.518405 0.0004261
PWY66-389: phytol degradation -2.557491 0.0008160
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I -2.717392 0.0002948
PWY-5189: tetrapyrrole biosynthesis II (from glycine) -2.738729 0.0011017
PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) -2.744253 0.0003297
PWY-7211: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis -2.752876 0.0000002
ARGORNPROST-PWY: arginine, ornithine and proline interconversion -2.811587 0.0012819
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I -2.816353 0.0000083
PWY-5863: superpathway of phylloquinol biosynthesis -2.856290 0.0049809
PWY-4981: L-proline biosynthesis II (from arginine) -2.896603 0.0003088
PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle -2.936548 0.0006690
PWY-5791: 1,4-dihydroxy-2-naphthoate biosynthesis II (plants) -2.942201 0.0032297
PWY-5837: 1,4-dihydroxy-2-naphthoate biosynthesis I -2.942201 0.0032297
THISYNARA-PWY: superpathway of thiamin diphosphate biosynthesis III (eukaryotes) -3.008688 0.0032675
PWY0-845: superpathway of pyridoxal 5’-phosphate biosynthesis and salvage -3.046322 0.0001500
PWY-5918: superpathay of heme biosynthesis from glutamate -3.146932 0.0001546
PWY-5989: stearate biosynthesis II (bacteria and plants) -3.274229 0.0001892
PWY-6168: flavin biosynthesis III (fungi) -3.317752 0.0010314
PENTOSE-P-PWY: pentose phosphate pathway -3.403425 0.0097015
PWY4FS-7: phosphatidylglycerol biosynthesis I (plastidic) -3.410684 0.0031237
HEMESYN2-PWY: heme biosynthesis II (anaerobic) -3.444899 0.0040657
PWY-6282: palmitoleate biosynthesis I (from (5Z)-dodec-5-enoate) -3.469113 0.0001367
PWY-5973: cis-vaccenate biosynthesis -3.491723 0.0001673
PWY-7282: 4-amino-2-methyl-5-phosphomethylpyrimidine biosynthesis (yeast) -3.525053 0.0000645
PWY-5899: superpathway of menaquinol-13 biosynthesis -3.544246 0.0028078
PWY-5897: superpathway of menaquinol-11 biosynthesis -3.544246 0.0028078
PWY-5898: superpathway of menaquinol-12 biosynthesis -3.544246 0.0028078
PWY-5121: superpathway of geranylgeranyl diphosphate biosynthesis II (via MEP) -3.550032 0.0063343
PWY-7392: taxadiene biosynthesis (engineered) -3.555892 0.0033284
FASYN-ELONG-PWY: fatty acid elongation – saturated -3.580566 0.0001118
NONMEVIPP-PWY: methylerythritol phosphate pathway I -3.655068 0.0026956
PWY4FS-8: phosphatidylglycerol biosynthesis II (non-plastidic) -3.677122 0.0014006
PWYG-321: mycolate biosynthesis -3.696486 0.0001244
DAPLYSINESYN-PWY: L-lysine biosynthesis I -3.741257 0.0000892
PWY-7664: oleate biosynthesis IV (anaerobic) -3.755008 0.0001399
PWY-5188: tetrapyrrole biosynthesis I (from glutamate) -3.781072 0.0043172
PWY0-781: aspartate superpathway -3.801118 0.0000014
PWY-5097: L-lysine biosynthesis VI -3.809828 0.0071639
ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) -3.832655 0.0046513
RIBOSYN2-PWY: flavin biosynthesis I (bacteria and plants) -3.842746 0.0001349
PWY-7560: methylerythritol phosphate pathway II -3.843193 0.0003818
PWY-5667: CDP-diacylglycerol biosynthesis I -3.859761 0.0006152
PHOSLIPSYN-PWY: superpathway of phospholipid biosynthesis I (bacteria) -3.879878 0.0051632
PWY-6270: isoprene biosynthesis I -3.946063 0.0005631
GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) -3.961867 0.0022039
PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing) -4.030221 0.0049877
PWY-7388: octanoyl-[acyl-carrier protein] biosynthesis (mitochondria, yeast) -4.149346 0.0023237
P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I -4.555713 0.0005788

Blood vs saliva_euk

Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.

blbe = pathways.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe = blbe[blbe['Blood_Saliva_euk_p'] < 0.01]
blbe = blbe.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe = pd.concat([blbe.head(n=50), blbe.tail(n=50)])
blbe = blbe.groupby(by=blbe.index, axis=0).mean()
blbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-7219: adenosine ribonucleotides de novo biosynthesis 6.1269895 0.0000746
PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I 5.9557212 0.0000960
PWY-6700: queuosine biosynthesis 5.8952182 0.0001747
PWY-3841: folate transformations II 5.8250691 0.0001884
PWY-1042: glycolysis IV (plant cytosol) 5.8248625 0.0006432
PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II 5.8126818 0.0001099
PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) 5.7677714 0.0011508
1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis 5.7645176 0.0003939
PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) 5.7581975 0.0006188
PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) 5.7565472 0.0004957
PWY-7221: guanosine ribonucleotides de novo biosynthesis 5.7561276 0.0001316
PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I 5.7542654 0.0004848
PWY-6151: S-adenosyl-L-methionine cycle I 5.7277924 0.0003969
PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) 5.7020234 0.0001720
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis 5.6901329 0.0005396
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II 5.6901329 0.0005396
ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) 5.6882823 0.0004411
PWY-5695: urate biosynthesis/inosine 5’-phosphate degradation 5.6795194 0.0003990
PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I 5.6568938 0.0001681
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I 5.6551977 0.0001406
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I 5.6263061 0.0008668
PWY66-400: glycolysis VI (metazoan) 5.6234895 0.0005515
GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) 5.6136348 0.0003154
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II 5.6119986 0.0001709
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II 5.6119986 0.0001709
PWY-6385: peptidoglycan biosynthesis III (mycobacteria) 5.6106221 0.0012744
PWY-5686: UMP biosynthesis 5.6030538 0.0003824
PWY-841: superpathway of purine nucleotides de novo biosynthesis I 5.5904864 0.0004242
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II 5.5843868 0.0001492
PWY-2942: L-lysine biosynthesis III 5.5585510 0.0003132
COA-PWY-1: coenzyme A biosynthesis II (mammalian) 5.5463333 0.0010274
PWY-7539: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis III (Chlamydia) 5.5353641 0.0007482
NONMEVIPP-PWY: methylerythritol phosphate pathway I 5.5236915 0.0018069
DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II 5.5117418 0.0004067
PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II 5.5071869 0.0003277
TRNA-CHARGING-PWY: tRNA charging 5.4954667 0.0004910
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) 5.4938084 0.0000965
VALSYN-PWY: L-valine biosynthesis 5.4938084 0.0000965
PWY-5097: L-lysine biosynthesis VI 5.4857195 0.0003747
PWY-5100: pyruvate fermentation to acetate and lactate II 5.4746135 0.0002767
PWY-7208: superpathway of pyrimidine nucleobases salvage 5.4565280 0.0002225
PWY-5484: glycolysis II (from fructose 6-phosphate) 5.4425775 0.0003163
PWY-7111: pyruvate fermentation to isobutanol (engineered) 5.4019701 0.0000882
BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis 5.3840107 0.0002105
CALVIN-PWY: Calvin-Benson-Bassham cycle 5.3717316 0.0002489
ANAEROFRUCAT-PWY: homolactic fermentation 5.3672958 0.0005483
PWY-5103: L-isoleucine biosynthesis III 5.3424743 0.0002371
PWY-6168: flavin biosynthesis III (fungi) 5.3365914 0.0021062
DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I 5.3168256 0.0007737
PWY-6123: inosine-5’-phosphate biosynthesis I 5.2887943 0.0002369
PWY66-422: D-galactose degradation V (Leloir pathway) 4.2985076 0.0019566
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 4.2677020 0.0037708
P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I 4.2672481 0.0007192
PWY-5121: superpathway of geranylgeranyl diphosphate biosynthesis II (via MEP) 4.2220106 0.0013731
PWY-6897: thiamin salvage II 4.1832183 0.0071123
PWY4FS-7: phosphatidylglycerol biosynthesis I (plastidic) 4.1814961 0.0026984
PWY4FS-8: phosphatidylglycerol biosynthesis II (non-plastidic) 4.1421538 0.0026989
PWY66-389: phytol degradation 4.1180691 0.0003474
PWY-6317: galactose degradation I (Leloir pathway) 4.1029088 0.0029584
HISTSYN-PWY: L-histidine biosynthesis 4.0927985 0.0007152
HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) 4.0692858 0.0005075
PWY-7282: 4-amino-2-methyl-5-phosphomethylpyrimidine biosynthesis (yeast) 4.0662574 0.0004197
GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) 4.0442863 0.0020580
P441-PWY: superpathway of N-acetylneuraminate degradation 4.0423428 0.0013078
PENTOSE-P-PWY: pentose phosphate pathway 4.0263385 0.0004375
PWY0-1261: anhydromuropeptides recycling 4.0238160 0.0012984
PWY-5918: superpathay of heme biosynthesis from glutamate 3.9569128 0.0012311
PWY0-781: aspartate superpathway 3.9424335 0.0008994
PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type 3.9019110 0.0013498
ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) 3.8135193 0.0007451
PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) 3.7855839 0.0018684
PWY0-1297: superpathway of purine deoxyribonucleosides degradation 3.7095790 0.0002252
PWY0-845: superpathway of pyridoxal 5’-phosphate biosynthesis and salvage 3.6582842 0.0013928
TCA: TCA cycle I (prokaryotic) 3.6566818 0.0008608
FERMENTATION-PWY: mixed acid fermentation 3.6134686 0.0013447
HEXITOLDEGSUPER-PWY: superpathway of hexitol degradation (bacteria) 3.5642268 0.0011384
PWY0-1298: superpathway of pyrimidine deoxyribonucleosides degradation 3.5336018 0.0006256
PWY-6471: peptidoglycan biosynthesis IV (Enterococcus faecium) 3.5015074 0.0026748
PWY-7279: aerobic respiration II (cytochrome c) (yeast) 3.4426575 0.0000269
PYRIDOXSYN-PWY: pyridoxal 5’-phosphate biosynthesis I 3.4231248 0.0043206
GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff 3.4177244 0.0006304
PWY-5913: TCA cycle VI (obligate autotrophs) 3.4130841 0.0026517
P161-PWY: acetylene degradation 3.3375681 0.0003007
PWY-4981: L-proline biosynthesis II (from arginine) 3.3333144 0.0024192
GLCMANNANAUT-PWY: superpathway of N-acetylglucosamine, N-acetylmannosamine and N-acetylneuraminate degradation 3.2463208 0.0086455
PWY0-1415: superpathway of heme biosynthesis from uroporphyrinogen-III 2.9864453 0.0012454
PWY-5690: TCA cycle II (plants and fungi) 2.9768770 0.0001744
P122-PWY: heterolactic fermentation 2.8025947 0.0035444
ARGORNPROST-PWY: arginine, ornithine and proline interconversion 2.8017007 0.0048207
PWY-5345: superpathway of L-methionine biosynthesis (by sulfhydrylation) 2.7348747 0.0024793
PWY-5173: superpathway of acetyl-CoA biosynthesis 2.7070859 0.0011152
PWY-6803: phosphatidylcholine acyl editing 2.6808764 0.0003176
SULFATE-CYS-PWY: superpathway of sulfate assimilation and cysteine biosynthesis 2.5335437 0.0035800
PPGPPMET-PWY: ppGpp biosynthesis 2.4945955 0.0059352
PWY0-1479: tRNA processing 2.4669272 0.0017515
PWY-5675: nitrate reduction V (assimilatory) 2.3503180 0.0002165
PWY-7115: C4 photosynthetic carbon assimilation cycle, NAD-ME type 2.3283740 0.0027060
PWY0-1061: superpathway of L-alanine biosynthesis 2.2840671 0.0051978
PWY-4041: &gamma;-glutamyl cycle 1.9487025 0.0087551
PWY-5067: glycogen biosynthesis II (from UDP-D-Glucose) -0.3234633 0.0088135

Blood vs buccal_euk

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

blse = pathways.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse = blse[blse['Blood_Buccal_euk_p'] < 0.01]
blse = blse.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse = pd.concat([blse.head(n=50), blse.tail(n=50)])
blse = blse.groupby(by=blse.index, axis=0).mean()
blse.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-7219: adenosine ribonucleotides de novo biosynthesis 4.082921 0.0000073
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II 3.944994 0.0000159
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II 3.944994 0.0000159
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I 3.490060 0.0001084
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II 3.447454 0.0001879
PWY-7221: guanosine ribonucleotides de novo biosynthesis 3.315561 0.0002034
VALSYN-PWY: L-valine biosynthesis 3.297722 0.0000880
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) 3.297722 0.0000880
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I 3.107286 0.0077383
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II 3.100352 0.0051319
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis 3.100352 0.0051319
PWY-6609: adenine and adenosine salvage III 3.068784 0.0002464
PWY66-400: glycolysis VI (metazoan) 2.818712 0.0003861
UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I 2.659206 0.0045700
PWY-6124: inosine-5’-phosphate biosynthesis II 2.523542 0.0044726
GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) 2.276063 0.0021645
PWY-5484: glycolysis II (from fructose 6-phosphate) 1.884414 0.0032187
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I 1.049353 0.0000346
PWY-3781: aerobic respiration I (cytochrome c) -3.487774 0.0023437

HUMAnN3

pathways = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/pathways.csv', header=0, index_col=0)

Blood vs saliva

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

bs3 = pathways.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs3 = bs3[bs3['Saliva_Blood_p'] < 0.01]
bs3 = bs3.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs3 = pd.concat([bs3.head(n=50), bs3.tail(n=50)])
bs3 = bs3.groupby(by=bs3.index, axis=0).mean()
bs3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) 1.369967 0.0015278
PWY66-201: nicotine degradation IV -1.339120 0.0066475
PWY-6823: molybdenum cofactor biosynthesis -1.550877 0.0000889
PWY-6595: superpathway of guanosine nucleotides degradation (plants) -1.576811 0.0005615
PWY-7031: protein N-glycosylation (bacterial) -1.601721 0.0066482
GLUCUROCAT-PWY: superpathway of &beta;-D-glucuronide and D-glucuronate degradation -1.698105 0.0057003
KETOGLUCONMET-PWY: ketogluconate metabolism -1.776304 0.0004196
PWY-6891: thiazole biosynthesis II (Bacillus) -1.820555 0.0097794
PWY-6749: CMP-legionaminate biosynthesis I -1.882757 0.0009012
PWY-7242: D-fructuronate degradation -2.076551 0.0021045
PWY-6507: 4-deoxy-L-threo-hex-4-enopyranuronate degradation -2.118720 0.0024219
PWY-6992: 1,5-anhydrofructose degradation -2.156391 0.0001124
PWY-6748: nitrate reduction VII (denitrification) -2.373015 0.0004265
PWY-5022: 4-aminobutanoate degradation V -2.375872 0.0040979
PWY-6708: ubiquinol-8 biosynthesis (prokaryotic) -2.475866 0.0011876
PWY-5857: ubiquinol-10 biosynthesis (prokaryotic) -2.475866 0.0011876
PWY-5856: ubiquinol-9 biosynthesis (prokaryotic) -2.475866 0.0011876
PWY-5855: ubiquinol-7 biosynthesis (prokaryotic) -2.475866 0.0011876
GOLPDLCAT-PWY: superpathway of glycerol degradation to 1,3-propanediol -2.543814 0.0007921
PWY-7210: pyrimidine deoxyribonucleotides biosynthesis from CTP -2.683044 0.0002874
PWY3O-355: stearate biosynthesis III (fungi) -2.702749 0.0007630
METHGLYUT-PWY: superpathway of methylglyoxal degradation -2.706670 0.0000037
PWY-5675: nitrate reduction V (assimilatory) -2.761991 0.0000679
PWY-5676: acetyl-CoA fermentation to butanoate II -2.807641 0.0000129
PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II -2.833581 0.0047793
P125-PWY: superpathway of (R,R)-butanediol biosynthesis -2.837213 0.0000549
P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) -2.885472 0.0000033
DENITRIFICATION-PWY: nitrate reduction I (denitrification) -2.986749 0.0004611
PWY66-389: phytol degradation -2.988662 0.0000608
PWY-5994: palmitate biosynthesis I (animals and fungi) -3.106828 0.0012030
PWY-6892: thiazole biosynthesis I (E. coli) -3.190007 0.0001594
PWY-4702: phytate degradation I -3.223278 0.0006150
REDCITCYC: TCA cycle VIII (helicobacter) -3.234659 0.0001317
P161-PWY: acetylene degradation -3.239657 0.0000163
PWY-7254: TCA cycle VII (acetate-producers) -3.250783 0.0001497
PWY-5083: NAD/NADH phosphorylation and dephosphorylation -3.274292 0.0005809
ARGORNPROST-PWY: arginine, ornithine and proline interconversion -3.346238 0.0002088
PWY-5367: petroselinate biosynthesis -3.364139 0.0047718
PWY4LZ-257: superpathway of fermentation (Chlamydomonas reinhardtii) -3.382913 0.0000188
PWY-5723: Rubisco shunt -3.516555 0.0001887
PWY-7385: 1,3-propanediol biosynthesis (engineered) -3.546559 0.0001336
PWY-7003: glycerol degradation to butanol -3.558627 0.0002047
SO4ASSIM-PWY: sulfate reduction I (assimilatory) -3.603495 0.0021792
PWY-7456: mannan degradation -3.612446 0.0000491
PWY-6606: guanosine nucleotides degradation II -3.629806 0.0005633
PWY-922: mevalonate pathway I -3.678726 0.0000316
P122-PWY: heterolactic fermentation -3.710629 0.0000636
PWY-5384: sucrose degradation IV (sucrose phosphorylase) -3.749889 0.0001721
PWY-6588: pyruvate fermentation to acetone -3.750906 0.0030886
PWY-5265: peptidoglycan biosynthesis II (staphylococci) -3.817206 0.0002463
PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing) -7.066243 0.0000015
PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II -7.066540 0.0000008
PWY-3001: superpathway of L-isoleucine biosynthesis I -7.081882 0.0000011
PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I -7.096266 0.0000063
PWY-5973: cis-vaccenate biosynthesis -7.109034 0.0000015
PWY-5103: L-isoleucine biosynthesis III -7.117862 0.0000006
BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis -7.132368 0.0000006
PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I -7.139054 0.0000007
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II -7.178015 0.0000006
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II -7.178015 0.0000006
PWY-4242: pantothenate and coenzyme A biosynthesis III -7.188008 0.0000027
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II -7.197799 0.0000007
PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) -7.222713 0.0000009
ARO-PWY: chorismate biosynthesis I -7.231900 0.0000016
CALVIN-PWY: Calvin-Benson-Bassham cycle -7.242959 0.0000031
COA-PWY: coenzyme A biosynthesis I -7.250466 0.0000027
COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis -7.251726 0.0000013
PWY-6163: chorismate biosynthesis from 3-dehydroquinate -7.280855 0.0000021
DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II -7.282089 0.0000007
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I -7.285223 0.0000008
PWY-6151: S-adenosyl-L-methionine cycle I -7.288070 0.0000005
VALSYN-PWY: L-valine biosynthesis -7.294140 0.0000004
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) -7.294140 0.0000004
ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) -7.320729 0.0000011
PWY-6700: queuosine biosynthesis -7.322231 0.0000025
COA-PWY-1: coenzyme A biosynthesis II (mammalian) -7.335901 0.0000013
PWY-6124: inosine-5’-phosphate biosynthesis II -7.335926 0.0000015
PWY-841: superpathway of purine nucleotides de novo biosynthesis I -7.345385 0.0000009
PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II -7.381062 0.0000005
PWY-6609: adenine and adenosine salvage III -7.383676 0.0000011
PWY-1042: glycolysis IV (plant cytosol) -7.390269 0.0000024
PWY-6123: inosine-5’-phosphate biosynthesis I -7.391219 0.0000013
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I -7.398170 0.0000013
1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis -7.398277 0.0000015
PWY-5695: urate biosynthesis/inosine 5’-phosphate degradation -7.436938 0.0000015
TRNA-CHARGING-PWY: tRNA charging -7.457226 0.0000019
PWY-5686: UMP biosynthesis -7.461998 0.0000017
PWY-7111: pyruvate fermentation to isobutanol (engineered) -7.474423 0.0000003
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis -7.478766 0.0000007
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II -7.478766 0.0000007
PWY-3841: folate transformations II -7.498662 0.0000022
PWY-6385: peptidoglycan biosynthesis III (mycobacteria) -7.510429 0.0000021
PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I -7.513373 0.0000006
PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) -7.522371 0.0000017
PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) -7.525967 0.0000018
PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) -7.543174 0.0000020
PWY-7221: guanosine ribonucleotides de novo biosynthesis -7.577720 0.0000018
PWY-7219: adenosine ribonucleotides de novo biosynthesis -7.685244 0.0000015
UNINTEGRATED -18.392918 0.0000000
UNMAPPED -19.270640 0.0000000

Blood vs buccal

Negative fold changes are higher in buccal samples while positive are higher in blood samples.

bb3 = pathways.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb3 = bb3[bb3['Buccal_Blood_p'] < 0.01]
bb3 = bb3.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb3 = pd.concat([bb3.head(n=50), bb3.tail(n=50)])
bb3 = bb3.groupby(by=bb3.index, axis=0).mean()
bb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
GLUCOSE1PMETAB-PWY: glucose and glucose-1-phosphate degradation -1.953539 0.0012949
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -2.071518 0.0000388
HEMESYN2-PWY: heme biosynthesis II (anaerobic) -2.375301 0.0004377
PWY-5384: sucrose degradation IV (sucrose phosphorylase) -2.491040 0.0019229
PPGPPMET-PWY: ppGpp biosynthesis -2.814633 0.0000632
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I -2.855298 0.0018581
P461-PWY: hexitol fermentation to lactate, formate, ethanol and acetate -2.910877 0.0022583
PWY-4981: L-proline biosynthesis II (from arginine) -3.099472 0.0000642
PANTO-PWY: phosphopantothenate biosynthesis I -3.135412 0.0082527
PANTOSYN-PWY: pantothenate and coenzyme A biosynthesis I -3.202533 0.0052060
ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) -3.317952 0.0004671
PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) -3.350229 0.0000992
ARGSYN-PWY: L-arginine biosynthesis I (via L-ornithine) -3.399892 0.0011887
HEXITOLDEGSUPER-PWY: superpathway of hexitol degradation (bacteria) -3.577781 0.0022764
PWY-621: sucrose degradation III (sucrose invertase) -3.583786 0.0030284
POLYISOPRENSYN-PWY: polyisoprenoid biosynthesis (E. coli) -3.616207 0.0021288
PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV -3.708284 0.0006673
P161-PWY: acetylene degradation -3.725179 0.0002659
PWY-3781: aerobic respiration I (cytochrome c) -3.786589 0.0000728
PWY-7196: superpathway of pyrimidine ribonucleosides salvage -3.812389 0.0016488
HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) -3.992538 0.0000650
THISYNARA-PWY: superpathway of thiamin diphosphate biosynthesis III (eukaryotes) -4.014917 0.0002446
PWY0-1297: superpathway of purine deoxyribonucleosides degradation -4.148703 0.0002220
PWY-5188: tetrapyrrole biosynthesis I (from glutamate) -4.198054 0.0000469
NAD-BIOSYNTHESIS-II: NAD salvage pathway II -4.322622 0.0003753
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I -4.347729 0.0005945
PWY-5659: GDP-mannose biosynthesis -4.412045 0.0003699
PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type -4.422857 0.0005355
PWY66-409: superpathway of purine nucleotide salvage -4.433302 0.0003692
PWY-6897: thiamin salvage II -4.438291 0.0018318
P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle) -4.440537 0.0015759
PWY-6168: flavin biosynthesis III (fungi) -4.483201 0.0004274
PWY-5989: stearate biosynthesis II (bacteria and plants) -4.567455 0.0018054
PENTOSE-P-PWY: pentose phosphate pathway -4.570613 0.0005848
PWY-5484: glycolysis II (from fructose 6-phosphate) -4.623735 0.0000903
PWY-5667: CDP-diacylglycerol biosynthesis I -4.660401 0.0000939
PWY-7388: octanoyl-[acyl-carrier protein] biosynthesis (mitochondria, yeast) -4.668495 0.0034495
FASYN-INITIAL-PWY: superpathway of fatty acid biosynthesis initiation (E. coli) -4.695887 0.0047357
ANAEROFRUCAT-PWY: homolactic fermentation -4.712941 0.0001589
P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I -4.728063 0.0005894
PWY-7539: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis III (Chlamydia) -4.732397 0.0006326
PWY-7357: thiamin formation from pyrithiamine and oxythiamine (yeast) -4.793122 0.0014301
PWY0-1298: superpathway of pyrimidine deoxyribonucleosides degradation -4.795474 0.0012426
DAPLYSINESYN-PWY: L-lysine biosynthesis I -4.817453 0.0000988
PWY-5097: L-lysine biosynthesis VI -4.903941 0.0001113
PWY0-862: (5Z)-dodec-5-enoate biosynthesis -4.916194 0.0010235
PWY-6282: palmitoleate biosynthesis I (from (5Z)-dodec-5-enoate) -4.924639 0.0010046
HOMOSER-METSYN-PWY: L-methionine biosynthesis I -5.004194 0.0009653
PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis -5.006905 0.0009292
DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I -5.025458 0.0004047
PWY-3841: folate transformations II -5.772760 0.0002901
PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) -5.777016 0.0002195
COA-PWY-1: coenzyme A biosynthesis II (mammalian) -5.784816 0.0001797
PWY-6385: peptidoglycan biosynthesis III (mycobacteria) -5.786133 0.0002306
PWY-6700: queuosine biosynthesis -5.788170 0.0003525
1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis -5.805661 0.0013852
THRESYN-PWY: superpathway of L-threonine biosynthesis -5.808601 0.0005263
PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) -5.811435 0.0002288
PWY66-400: glycolysis VI (metazoan) -5.820233 0.0002662
ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) -5.827153 0.0000650
COMPLETE-ARO-PWY: superpathway of aromatic amino acid biosynthesis -5.843970 0.0006464
PWY-7197: pyrimidine deoxyribonucleotide phosphorylation -5.861724 0.0021314
PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) -5.864866 0.0003747
TRPSYN-PWY: L-tryptophan biosynthesis -5.887985 0.0005632
PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing) -5.916768 0.0005520
PWY-6737: starch degradation V -5.930443 0.0034960
PWY-3001: superpathway of L-isoleucine biosynthesis I -5.946788 0.0005302
PWY-6151: S-adenosyl-L-methionine cycle I -5.948886 0.0002898
PWY-6936: seleno-amino acid biosynthesis -5.970942 0.0000997
PWY-7199: pyrimidine deoxyribonucleosides salvage -6.001521 0.0051249
PWY-7187: pyrimidine deoxyribonucleotides de novo biosynthesis II -6.006174 0.0006348
PWY0-1296: purine ribonucleosides degradation -6.017901 0.0002293
PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) -6.040152 0.0006188
PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I -6.054700 0.0010350
PWY-7221: guanosine ribonucleotides de novo biosynthesis -6.076546 0.0000988
TRNA-CHARGING-PWY: tRNA charging -6.078022 0.0002090
BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis -6.103833 0.0006245
PWY-5103: L-isoleucine biosynthesis III -6.103833 0.0007868
PWY-7208: superpathway of pyrimidine nucleobases salvage -6.114675 0.0010667
PWY-7234: inosine-5’-phosphate biosynthesis III -6.126006 0.0005278
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I -6.126660 0.0005909
PWY-6124: inosine-5’-phosphate biosynthesis II -6.127083 0.0002281
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I -6.131049 0.0001478
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II -6.140068 0.0004916
PWY-841: superpathway of purine nucleotides de novo biosynthesis I -6.150735 0.0003557
PWY-6123: inosine-5’-phosphate biosynthesis I -6.151446 0.0003128
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II -6.151560 0.0001069
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis -6.151560 0.0001069
DENOVOPURINE2-PWY: superpathway of purine nucleotides de novo biosynthesis II -6.166463 0.0003280
VALSYN-PWY: L-valine biosynthesis -6.271443 0.0000911
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) -6.271443 0.0000911
PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II -6.361994 0.0001041
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II -6.379021 0.0000885
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II -6.379021 0.0000885
PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I -6.439987 0.0000954
PWY-7219: adenosine ribonucleotides de novo biosynthesis -6.482576 0.0000908
PWY-7111: pyruvate fermentation to isobutanol (engineered) -6.483689 0.0000938
PWY-6609: adenine and adenosine salvage III -6.888554 0.0018737
UNINTEGRATED -17.960046 0.0000000
UNMAPPED -19.493154 0.0000000

Saliva vs buccal

Negative fold changes are higher in saliva samples while positive are higher in buccal samples.

sb3 = pathways.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb3 = sb3[sb3['Saliva_Buccal_p'] < 0.01]
sb3 = sb3.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb3 = pd.concat([sb3.head(n=50), sb3.tail(n=50)])
sb3 = sb3.groupby(by=sb3.index, axis=0).mean()
sb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-5304: superpathway of sulfur oxidation (Acidianus ambivalens) 1.369967 0.0016945
PWY66-201: nicotine degradation IV -1.339120 0.0066475
PWY-7031: protein N-glycosylation (bacterial) -1.601721 0.0066482
GLUCUROCAT-PWY: superpathway of &beta;-D-glucuronide and D-glucuronate degradation -1.698105 0.0057003
PWY-6749: CMP-legionaminate biosynthesis I -1.882757 0.0009012
PWY66-389: phytol degradation -2.000471 0.0082854
PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) -2.073991 0.0015441
PWY-7242: D-fructuronate degradation -2.076551 0.0021045
PWY-5097: L-lysine biosynthesis VI -2.084571 0.0081699
PWY-6507: 4-deoxy-L-threo-hex-4-enopyranuronate degradation -2.118720 0.0024219
PWY-6992: 1,5-anhydrofructose degradation -2.156391 0.0001299
PWY-5484: glycolysis II (from fructose 6-phosphate) -2.195773 0.0058462
ANAEROFRUCAT-PWY: homolactic fermentation -2.206504 0.0074108
HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) -2.217211 0.0042655
PWY-7328: superpathway of UDP-glucose-derived O-antigen building blocks biosynthesis -2.230378 0.0077247
PWY-5667: CDP-diacylglycerol biosynthesis I -2.280912 0.0054802
PWY-6748: nitrate reduction VII (denitrification) -2.373015 0.0004265
PWY-5188: tetrapyrrole biosynthesis I (from glutamate) -2.397191 0.0038231
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I -2.418632 0.0083616
PWY-3781: aerobic respiration I (cytochrome c) -2.426190 0.0041991
PWY-6708: ubiquinol-8 biosynthesis (prokaryotic) -2.475866 0.0012813
PWY-5857: ubiquinol-10 biosynthesis (prokaryotic) -2.475866 0.0012813
PWY-5856: ubiquinol-9 biosynthesis (prokaryotic) -2.475866 0.0012813
PWY-5855: ubiquinol-7 biosynthesis (prokaryotic) -2.475866 0.0012813
PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV -2.559669 0.0079899
ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) -2.598809 0.0084929
PWY-4981: L-proline biosynthesis II (from arginine) -2.601973 0.0016975
PWY0-1415: superpathway of heme biosynthesis from uroporphyrinogen-III -2.615203 0.0064639
PWY0-1241: ADP-L-glycero-&beta;-D-manno-heptose biosynthesis -2.700916 0.0085394
PWY3O-355: stearate biosynthesis III (fungi) -2.702749 0.0020314
HEMESYN2-PWY: heme biosynthesis II (anaerobic) -2.724787 0.0091285
REDCITCYC: TCA cycle VIII (helicobacter) -2.758828 0.0039355
PWY-7254: TCA cycle VII (acetate-producers) -2.760869 0.0052888
PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II -2.833581 0.0047793
P108-PWY: pyruvate fermentation to propanoate I -2.846079 0.0028413
P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) -2.885472 0.0003474
PWY-2723: trehalose degradation V -2.931790 0.0084902
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -2.942390 0.0000997
DENITRIFICATION-PWY: nitrate reduction I (denitrification) -2.986749 0.0032300
PWY-5994: palmitate biosynthesis I (animals and fungi) -3.106828 0.0012030
PPGPPMET-PWY: ppGpp biosynthesis -3.146512 0.0012225
PWY-6892: thiazole biosynthesis I (E. coli) -3.190007 0.0037629
PWY-4702: phytate degradation I -3.223278 0.0006150
PWY-6969: TCA cycle V (2-oxoglutarate:ferredoxin oxidoreductase) -3.235096 0.0046632
ARGORNPROST-PWY: arginine, ornithine and proline interconversion -3.346238 0.0002088
PWY-5690: TCA cycle II (plants and fungi) -3.370347 0.0059353
PANTO-PWY: phosphopantothenate biosynthesis I -3.511841 0.0058816
PWY-7385: 1,3-propanediol biosynthesis (engineered) -3.546559 0.0003270
PWY-7456: mannan degradation -3.612446 0.0000491
PANTOSYN-PWY: pantothenate and coenzyme A biosynthesis I -3.619359 0.0063882
PWY-6803: phosphatidylcholine acyl editing -3.633404 0.0008398
GLUCONEO-PWY: gluconeogenesis I -3.646638 0.0079964
COBALSYN-PWY: adenosylcobalamin salvage from cobinamide I -3.673371 0.0053391
CITRULBIO-PWY: L-citrulline biosynthesis -3.737103 0.0051830
NAGLIPASYN-PWY: lipid IVA biosynthesis -3.823621 0.0055214
PWY-5863: superpathway of phylloquinol biosynthesis -3.910849 0.0071380
PWY-5189: tetrapyrrole biosynthesis II (from glycine) -3.915771 0.0038738
PWY-6263: superpathway of menaquinol-8 biosynthesis II -3.919003 0.0005096
PWY-5104: L-isoleucine biosynthesis IV -3.970444 0.0050378
PWY-6901: superpathway of glucose and xylose degradation -4.000096 0.0000287
P164-PWY: purine nucleobases degradation I (anaerobic) -4.051673 0.0052106
POLYAMSYN-PWY: superpathway of polyamine biosynthesis I -4.099309 0.0000917
HISDEG-PWY: L-histidine degradation I -4.126351 0.0039741
PWY-5920: superpathway of heme biosynthesis from glycine -4.135103 0.0012035
PYRIDNUCSYN-PWY: NAD biosynthesis I (from aspartate) -4.257678 0.0047888
PWY-4984: urea cycle -4.266707 0.0037367
PWY-7115: C4 photosynthetic carbon assimilation cycle, NAD-ME type -4.324324 0.0040105
PWY-622: starch biosynthesis -4.431540 0.0043576
P42-PWY: incomplete reductive TCA cycle -4.461541 0.0017764
P23-PWY: reductive TCA cycle I -4.472183 0.0001604
PWY-5030: L-histidine degradation III -4.472589 0.0070466
PWY-6305: putrescine biosynthesis IV -4.638622 0.0007453
ARG+POLYAMINE-SYN: superpathway of arginine and polyamine biosynthesis -4.767626 0.0000078
PWY0-881: superpathway of fatty acid biosynthesis I (E. coli) -4.795367 0.0017388
COLANSYN-PWY: colanic acid building blocks biosynthesis -4.879777 0.0003081
PWY-6545: pyrimidine deoxyribonucleotides de novo biosynthesis III -4.962008 0.0013138
GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff -4.993373 0.0036655
THISYN-PWY: superpathway of thiamin diphosphate biosynthesis I -5.130474 0.0059325
PWY-241: C4 photosynthetic carbon assimilation cycle, NADP-ME type -5.381551 0.0006837
PWY-5840: superpathway of menaquinol-7 biosynthesis -5.806246 0.0039684
PWY-7200: superpathway of pyrimidine deoxyribonucleoside salvage -6.019793 0.0000074

Saliva vs saliva_euk

Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.

ss3 = pathways.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss3 = ss3[ss3['Saliva_Saliva_euk_p'] < 0.01]
ss3 = ss3.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss3 = pd.concat([ss3.head(n=50), ss3.tail(n=50)])
ss3 = ss3.groupby(by=ss3.index, axis=0).mean()
ss3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -1.370885 0.0087048
KETOGLUCONMET-PWY: ketogluconate metabolism -1.776304 0.0026801
PWY-5675: nitrate reduction V (assimilatory) -2.236857 0.0068771
P125-PWY: superpathway of (R,R)-butanediol biosynthesis -2.559305 0.0020286
METHGLYUT-PWY: superpathway of methylglyoxal degradation -2.706670 0.0094885
PWY-5676: acetyl-CoA fermentation to butanoate II -2.771931 0.0070687
P105-PWY: TCA cycle IV (2-oxoglutarate decarboxylase) -2.885472 0.0000033
PWY-6901: superpathway of glucose and xylose degradation -3.022929 0.0067578
PWY-5994: palmitate biosynthesis I (animals and fungi) -3.106828 0.0012030
PWY-5265: peptidoglycan biosynthesis II (staphylococci) -3.817206 0.0002463

Buccal vs buccal_euk

Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.

bbe3 = pathways.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe3 = bbe3[bbe3['Buccal_Buccal_euk_p'] < 0.01]
bbe3 = bbe3.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe3 = pd.concat([bbe3.head(n=50), bbe3.tail(n=50)])
bbe3 = bbe3.groupby(by=bbe3.index, axis=0).mean()
bbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
PWY-7279: aerobic respiration II (cytochrome c) (yeast) -2.071518 0.0000388
HEMESYN2-PWY: heme biosynthesis II (anaerobic) -2.375301 0.0004377
PWY-5384: sucrose degradation IV (sucrose phosphorylase) -2.491040 0.0019229
PPGPPMET-PWY: ppGpp biosynthesis -2.814633 0.0000632
PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I -2.855298 0.0018581
PWY-4981: L-proline biosynthesis II (from arginine) -3.099472 0.0008894
ARGSYNBSUB-PWY: L-arginine biosynthesis II (acetyl cycle) -3.317952 0.0080073
PWY-7383: anaerobic energy metabolism (invertebrates, cytosol) -3.350229 0.0064698
POLYISOPRENSYN-PWY: polyisoprenoid biosynthesis (E. coli) -3.616207 0.0092780
PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV -3.708284 0.0069478
PWY-3781: aerobic respiration I (cytochrome c) -3.786589 0.0084026
PWY-7196: superpathway of pyrimidine ribonucleosides salvage -3.812389 0.0016488
HEME-BIOSYNTHESIS-II: heme biosynthesis I (aerobic) -3.992538 0.0046983
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I -4.135579 0.0035870
PWY-5188: tetrapyrrole biosynthesis I (from glutamate) -4.198054 0.0008200
PWY-5097: L-lysine biosynthesis VI -4.226292 0.0065865
NAD-BIOSYNTHESIS-II: NAD salvage pathway II -4.322622 0.0003753
PWY-5659: GDP-mannose biosynthesis -4.412045 0.0003873
PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type -4.422857 0.0005355
PWY66-409: superpathway of purine nucleotide salvage -4.433302 0.0003692
P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle) -4.440537 0.0015759
PWY-6168: flavin biosynthesis III (fungi) -4.483201 0.0016972
PENTOSE-P-PWY: pentose phosphate pathway -4.570613 0.0005848
PWY-5667: CDP-diacylglycerol biosynthesis I -4.660401 0.0010937
PWY-2942: L-lysine biosynthesis III -4.698373 0.0044761
P4-PWY: superpathway of L-lysine, L-threonine and L-methionine biosynthesis I -4.728063 0.0005894
DAPLYSINESYN-PWY: L-lysine biosynthesis I -4.817453 0.0000988
PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis -5.006905 0.0077868
PWY-5913: TCA cycle VI (obligate autotrophs) -5.533614 0.0031467

Blood vs saliva_euk

Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.

blbe3 = pathways.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe3 = blbe3[blbe3['Blood_Saliva_euk_p'] < 0.01]
blbe3 = blbe3.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe3 = pd.concat([blbe3.head(n=50), blbe3.tail(n=50)])
blbe3 = blbe3.groupby(by=blbe3.index, axis=0).mean()
blbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UNMAPPED 19.493011 0.0000000
UNINTEGRATED 17.939282 0.0000000
PWY-7219: adenosine ribonucleotides de novo biosynthesis 6.110646 0.0007535
PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I 6.100552 0.0011918
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II 6.095096 0.0016145
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II 6.095096 0.0016145
PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II 5.979035 0.0012916
PWY-7221: guanosine ribonucleotides de novo biosynthesis 5.877347 0.0006632
PWY-7111: pyruvate fermentation to isobutanol (engineered) 5.814683 0.0001827
PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing) 5.791791 0.0025942
PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing) 5.773506 0.0018236
PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing) 5.757256 0.0024790
TRNA-CHARGING-PWY: tRNA charging 5.754887 0.0006745
PWY-6700: queuosine biosynthesis 5.734196 0.0005906
PWY-3841: folate transformations II 5.726477 0.0011950
PWY-6385: peptidoglycan biosynthesis III (mycobacteria) 5.714218 0.0022687
PWY-1042: glycolysis IV (plant cytosol) 5.701539 0.0023823
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) 5.671574 0.0004770
VALSYN-PWY: L-valine biosynthesis 5.671574 0.0004770
PWY-5686: UMP biosynthesis 5.646300 0.0020403
ANAGLYCOLYSIS-PWY: glycolysis III (from glucose) 5.633261 0.0016283
PWY-6151: S-adenosyl-L-methionine cycle I 5.624810 0.0000442
PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I 5.572692 0.0006148
PWY-6123: inosine-5’-phosphate biosynthesis I 5.570520 0.0014188
PWY66-400: glycolysis VI (metazoan) 5.548027 0.0016291
PWY0-166: superpathway of pyrimidine deoxyribonucleotides de novo biosynthesis (E. coli) 5.546212 0.0011415
PWY-6124: inosine-5’-phosphate biosynthesis II 5.509068 0.0011130
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I 5.492656 0.0006684
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II 5.492486 0.0008237
PWY-5973: cis-vaccenate biosynthesis 5.444498 0.0001009
GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) 5.431052 0.0011567
COA-PWY-1: coenzyme A biosynthesis II (mammalian) 5.430403 0.0020649
BRANCHED-CHAIN-AA-SYN-PWY: superpathway of branched amino acid biosynthesis 5.419439 0.0012262
PWY-5103: L-isoleucine biosynthesis III 5.419439 0.0019216
PWY-2942: L-lysine biosynthesis III 5.392441 0.0019641
PWY-7663: gondoate biosynthesis (anaerobic) 5.381270 0.0000442
PWY-7184: pyrimidine deoxyribonucleotides de novo biosynthesis I 5.362463 0.0010197
COA-PWY: coenzyme A biosynthesis I 5.361069 0.0015540
CALVIN-PWY: Calvin-Benson-Bassham cycle 5.359116 0.0011099
PWY-5484: glycolysis II (from fructose 6-phosphate) 5.324635 0.0017712
PWY-5097: L-lysine biosynthesis VI 5.319435 0.0023547
PWY-4242: pantothenate and coenzyme A biosynthesis III 5.305762 0.0035591
PWY-3001: superpathway of L-isoleucine biosynthesis I 5.269120 0.0023617
GLUCONEO-PWY: gluconeogenesis I 5.202574 0.0006711
ANAEROFRUCAT-PWY: homolactic fermentation 5.142470 0.0018552
PWY-7208: superpathway of pyrimidine nucleobases salvage 5.137379 0.0034481
THRESYN-PWY: superpathway of L-threonine biosynthesis 5.117801 0.0032271
HSERMETANA-PWY: L-methionine biosynthesis III 5.061252 0.0030230
PANTOSYN-PWY: pantothenate and coenzyme A biosynthesis I 5.037823 0.0029191
PWY-6936: seleno-amino acid biosynthesis 5.015933 0.0002031
ASPASN-PWY: superpathway of L-aspartate and L-asparagine biosynthesis 5.004985 0.0008390
PWY-7199: pyrimidine deoxyribonucleosides salvage 4.999708 0.0057875
PANTO-PWY: phosphopantothenate biosynthesis I 4.998813 0.0045278
FASYN-ELONG-PWY: fatty acid elongation – saturated 4.897003 0.0000209
PWY-6737: starch degradation V 4.863308 0.0016062
NONOXIPENT-PWY: pentose phosphate pathway (non-oxidative branch) 4.859926 0.0007491
PWY-7197: pyrimidine deoxyribonucleotide phosphorylation 4.854472 0.0010486
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I 4.850115 0.0014276
PWY-7234: inosine-5’-phosphate biosynthesis III 4.834209 0.0012272
PWY-7664: oleate biosynthesis IV (anaerobic) 4.833572 0.0000237
PWY0-862: (5Z)-dodec-5-enoate biosynthesis 4.753398 0.0000225
PWY-6282: palmitoleate biosynthesis I (from (5Z)-dodec-5-enoate) 4.750186 0.0000219
PWY-3781: aerobic respiration I (cytochrome c) 4.741331 0.0000165
DAPLYSINESYN-PWY: L-lysine biosynthesis I 4.563070 0.0011296
PWY-5989: stearate biosynthesis II (bacteria and plants) 4.543679 0.0000382
PWY-7198: pyrimidine deoxyribonucleotides de novo biosynthesis IV 4.457328 0.0003134
PWY66-422: D-galactose degradation V (Leloir pathway) 4.362995 0.0019013
HOMOSER-METSYN-PWY: L-methionine biosynthesis I 4.328386 0.0003852
GLYCOGENSYNTH-PWY: glycogen biosynthesis I (from ADP-D-Glucose) 4.144839 0.0012492
PENTOSE-P-PWY: pentose phosphate pathway 4.018131 0.0014803
TCA: TCA cycle I (prokaryotic) 3.929039 0.0002437
PWY-6317: galactose degradation I (Leloir pathway) 3.887112 0.0075802
PWY0-1479: tRNA processing 3.874353 0.0007041
PWY-7117: C4 photosynthetic carbon assimilation cycle, PEPCK type 3.783430 0.0002497
PWY-7279: aerobic respiration II (cytochrome c) (yeast) 3.643022 0.0000058
PWY-5913: TCA cycle VI (obligate autotrophs) 3.351989 0.0006523
HEMESYN2-PWY: heme biosynthesis II (anaerobic) 3.208226 0.0003164
GLYCOLYSIS-E-D: superpathway of glycolysis and Entner-Doudoroff 3.185600 0.0011206
PWY0-1241: ADP-L-glycero-&beta;-D-manno-heptose biosynthesis 3.132300 0.0043060
PWY-5173: superpathway of acetyl-CoA biosynthesis 2.832510 0.0022145
PWY-6803: phosphatidylcholine acyl editing 2.676857 0.0000205
SO4ASSIM-PWY: sulfate reduction I (assimilatory) 2.247477 0.0014842
GLUCOSE1PMETAB-PWY: glucose and glucose-1-phosphate degradation 2.100950 0.0022532
PWY-5505: L-glutamate and L-glutamine biosynthesis 1.236371 0.0014493

Blood vs buccal_euk

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

blse3 = pathways.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse3 = blse3[blse3['Blood_Buccal_euk_p'] < 0.01]
blse3 = blse3.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse3 = pd.concat([blse3.head(n=50), blse.tail(n=50)])
blse3 = blse3.groupby(by=blse3.index, axis=0).mean()
blse3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UNMAPPED 19.525104 0.0000000
UNINTEGRATED 17.899060 0.0000000
PWY-7219: adenosine ribonucleotides de novo biosynthesis 4.082921 0.0000073
PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II 3.944994 0.0000159
PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II 3.944994 0.0000159
PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I 3.490060 0.0001084
PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II 3.447454 0.0001879
ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine) 3.297722 0.0000880
VALSYN-PWY: L-valine biosynthesis 3.297722 0.0000880
PWY-6121: 5-aminoimidazole ribonucleotide biosynthesis I 3.107286 0.0077383
PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis 3.100352 0.0051319
PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II 3.100352 0.0051319
PWY-7221: guanosine ribonucleotides de novo biosynthesis 3.096631 0.0028047
PWY-6609: adenine and adenosine salvage III 3.068784 0.0002464
PWY66-400: glycolysis VI (metazoan) 2.818712 0.0003861
UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I 2.659206 0.0045700
PWY-6151: S-adenosyl-L-methionine cycle I 2.537255 0.0016117
PWY-6124: inosine-5’-phosphate biosynthesis II 2.523542 0.0044726
GLYCOLYSIS: glycolysis I (from glucose 6-phosphate) 2.276063 0.0021645
PWY-5484: glycolysis II (from fructose 6-phosphate) 1.884414 0.0032187
SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I 1.049353 0.0000346
PWY-3781: aerobic respiration I (cytochrome c) -3.487774 0.0023437

HUMAnN most differentially abundant gene families

For each of these, I am initially filtering out to only P values that are below 0.01, and then am taking only the most up- or down-regulated gene families and printing them to a table.

HUMAnN2

For HUMAnN2 I am now looking at the gene families assigned to the Uniref50 database.

genes = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/genes_50.csv', header=0, index_col=0)

Blood vs saliva

Negative fold changes are higher in saliva samples while positive are higher in blood samples.

bs = genes.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs = bs[bs['Saliva_Blood_p'] < 0.01]
bs = bs.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs = pd.concat([bs.head(n=50), bs.tail(n=50)])
bs.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_S9WFI9 6.629314 0.0001764
UniRef50_H9FCW3: Mediator of DNA damage checkpoint protein 1 (Fragment) 5.212867 0.0001696
UniRef50_H9FCS1: Mediator of DNA damage checkpoint protein 1 (Fragment) 5.185712 0.0002440
UniRef50_Q1AN79: MHC class II antigen (Fragment) 4.838595 0.0009648
UniRef50_UPI00039927F1: PREDICTED: zinc finger protein 551-like 4.685647 0.0071851
UniRef50_L5KJU1: 60S ribosomal protein L4 4.583985 0.0003444
UniRef50_X1FQ44: Marine sediment metagenome DNA, contig: S03H2_L09072 (Fragment) 4.307323 0.0003196
UniRef50_UPI00045E1407 4.156456 0.0085930
UniRef50_Q29269: Orf protein (Fragment) 4.114960 0.0010203
UniRef50_V8PE67: Tyrosine-protein phosphatase non-receptor type 11 (Fragment) 3.962689 0.0019819
UniRef50_H2PP93 3.712350 0.0091976
UniRef50_UPI0003ABAA48: PREDICTED: olfactory receptor 5F1-like 3.710408 0.0097286
UniRef50_Q1LZJ1: Arl4c protein (Fragment) 3.627737 0.0050527
UniRef50_M9WM38: Heat shock cognate 70 isoform 2 (Fragment) 3.598336 0.0003457
UniRef50_UPI0003AB5B64 3.551438 0.0043191
UniRef50_F8LAC9 3.533322 0.0009125
UniRef50_J9E765 3.377193 0.0000095
UniRef50_UPI000387AFAD: PREDICTED: NADH-ubiquinone oxidoreductase chain 5-like isoform X1 3.339387 0.0012347
UniRef50_UPI0002ADB57A 3.313237 0.0067151
UniRef50_I0EZ67: Mucin 3A (Fragment) 3.306479 0.0096600
UniRef50_UPI0003AB8CC4 3.305078 0.0036731
UniRef50_UPI0003EB058D: PREDICTED: NADH-ubiquinone oxidoreductase chain 5-like isoform X2 3.281868 0.0038307
UniRef50_UPI0003EB002A 3.229292 0.0019300
UniRef50_UPI0003EAFECD: PREDICTED: mucin-3A-like, partial 3.210186 0.0055594
UniRef50_A0A016SQN7 3.125134 0.0045341
UniRef50_V4LU59 3.021396 0.0009296
UniRef50_UPI00045DC74A: PREDICTED: mucin-3A-like 2.962924 0.0042566
UniRef50_UPI00045DA955 2.930116 0.0017526
UniRef50_A0A024QYZ5: HCG1796935, isoform CRA_a 2.913677 0.0041174
UniRef50_R4GJT7 2.900382 0.0002836
UniRef50_B4DF53: cDNA FLJ53551, highly similar to Zinc finger protein 248 2.897411 0.0000542
UniRef50_B7P3Z5 2.889594 0.0019244
UniRef50_Q5U4M3: LOC495467 protein (Fragment) 2.850899 0.0031476
UniRef50_M3ZCM1 2.828006 0.0012047
UniRef50_UPI0003EB02DB: PREDICTED: homeobox protein aristaless-like 2.823574 0.0022403
UniRef50_A7RSV6: Predicted protein 2.817247 0.0039070
UniRef50_F6QNF7 2.800879 0.0069183
UniRef50_UPI00046B6145: PREDICTED: putative double homeobox protein 2, partial 2.763190 0.0002367
UniRef50_UPI0003EAF13E: PREDICTED: LOW QUALITY PROTEIN: mucin-3A, partial 2.751814 0.0054069
UniRef50_M3ZBB0 2.728827 0.0010611
UniRef50_UPI00045DF71E: PREDICTED: LOW QUALITY PROTEIN: double homeobox protein 4C-like 2.725843 0.0019888
UniRef50_UPI00020AF5A5: PREDICTED: double homeobox protein 4-like protein 4-like 2.723618 0.0018227
UniRef50_M3ZBZ2 2.703242 0.0009854
UniRef50_W4HSC3 2.689705 0.0006538
UniRef50_G7K0E8 2.687715 0.0000253
UniRef50_C6GLY9 2.685302 0.0043274
UniRef50_X1Q312: Marine sediment metagenome DNA, contig: S06H3_S28886 (Fragment) 2.684738 0.0006639
UniRef50_M3Z906 2.675682 0.0009384
UniRef50_UPI0003EAE9FB: PREDICTED: double homeobox protein 4-like protein 4-like 2.652845 0.0020827
UniRef50_G3SIN0 2.643185 0.0023509
UniRef50_D3A4C9 -13.989167 0.0000005
UniRef50_Q15662: Transformation-related protein (Fragment) -14.023106 0.0000000
UniRef50_R5BM51: Rubredoxin -14.096789 0.0000004
UniRef50_K0ZJH0 -14.097575 0.0000000
UniRef50_I1YR15: PF14128 domain protein -14.149381 0.0000003
UniRef50_A5TSU5 -14.163944 0.0000028
UniRef50_F0ET79 -14.215695 0.0000093
UniRef50_F2KZW6 -14.258185 0.0000000
UniRef50_D1BML4 -14.260063 0.0000000
UniRef50_F8WU95 -14.261922 0.0001824
UniRef50_V8M0W9 -14.277070 0.0000359
UniRef50_UPI0002743795 -14.306087 0.0000000
UniRef50_K4AUT0 -14.342607 0.0000000
UniRef50_D3I623 -14.345769 0.0003207
UniRef50_UPI0001D54BAB: PREDICTED: hypothetical protein LOC100423797 -14.350340 0.0000000
UniRef50_E6MSA0 -14.415821 0.0000067
UniRef50_D9RRX3 -14.416132 0.0000000
UniRef50_C7T763: Conserved protein -14.437388 0.0000000
UniRef50_F9EJV5 -14.437395 0.0000112
UniRef50_Q67JS5: 50S ribosomal protein L33 -14.443151 0.0000002
UniRef50_F8WU92 -14.474993 0.0000125
UniRef50_E3H1F1 -14.493173 0.0002929
UniRef50_B2DWT6 -14.570674 0.0000000
UniRef50_Q5R4Z9 -14.596096 0.0000000
UniRef50_D9RWB8 -14.630906 0.0000004
UniRef50_E7EQT9: C4b-binding protein beta chain -14.693200 0.0000000
UniRef50_T0V883: Peptide deformylase -14.703710 0.0000000
UniRef50_Q0SNB1: 50S ribosomal protein L33 -14.703815 0.0000000
UniRef50_F5XS62 -14.882678 0.0000000
UniRef50_D4CVQ8 -14.890000 0.0000432
UniRef50_R5FC77 -14.930794 0.0002078
UniRef50_J7SHX6 -14.994454 0.0000000
UniRef50_L1N274 -15.056507 0.0000064
UniRef50_Q5FHQ2: 50S ribosomal protein L34 -15.113213 0.0000002
UniRef50_P66235: 50S ribosomal protein L33 3 -15.212730 0.0000000
UniRef50_K3Z2X2 -15.315930 0.0000018
UniRef50_R6WM19 -15.362005 0.0000000
UniRef50_Q045W4: 50S ribosomal protein L33 1 -15.445991 0.0000000
UniRef50_Q8PJA2 -15.464577 0.0000000
UniRef50_Q9HC73-2: Isoform 2 of Cytokine receptor-like factor 2 -15.472136 0.0000000
UniRef50_A5TWX0 -15.507820 0.0000080
UniRef50_G6AI72 -15.763796 0.0000001
UniRef50_UPI00029DC723 -15.788376 0.0000000
UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) -16.027757 0.0000000
UniRef50_UPI00027F211C -16.055269 0.0000000
UniRef50_UPI0002ADA9E1 -16.197589 0.0000000
UniRef50_UPI000273E20F -16.210068 0.0000000
UniRef50_UPI00045D7F14 -16.483538 0.0000000
UniRef50_A4VYJ1 -16.776385 0.0000000
UniRef50_unknown -18.595382 0.0010287

Blood vs buccal

Negative fold changes are higher in buccal samples while positive are higher in blood samples.

bb = genes.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb = bb[bb['Buccal_Blood_p'] < 0.01]
bb= bb.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb = pd.concat([bb.head(n=50), bb.tail(n=50)])
bb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_E1GF92 5.743297 0.0005166
UniRef50_Q1AN79: MHC class II antigen (Fragment) 3.563088 0.0007322
UniRef50_X1FQ44: Marine sediment metagenome DNA, contig: S03H2_L09072 (Fragment) 3.336042 0.0002325
UniRef50_L5KJU1: 60S ribosomal protein L4 3.084726 0.0031901
UniRef50_F6W0F5 2.753011 0.0036775
UniRef50_H2PP93 2.673160 0.0071481
UniRef50_H9FCS1: Mediator of DNA damage checkpoint protein 1 (Fragment) 2.524942 0.0077217
UniRef50_F8LAC9 2.287875 0.0047304
UniRef50_L5LZ23: Olfactory receptor 51G1 2.227058 0.0010465
UniRef50_G3UWI9: SMT3 suppressor of mif two 3 homolog 3 (Yeast), isoform CRA_c 2.072888 0.0097392
UniRef50_F7HKI0: Peptidyl-prolyl cis-trans isomerase (Fragment) 2.058932 0.0041232
UniRef50_H0XH74 2.034486 0.0036756
UniRef50_Q4Y9X3: Ferlin like protein, putative (Fragment) 1.921880 0.0092333
UniRef50_F7I766: Taste receptor type 2 1.849345 0.0066352
UniRef50_M5FK34: Olfactory receptor 1.831726 0.0027632
UniRef50_F8L9X4 1.813557 0.0048592
UniRef50_M9WM38: Heat shock cognate 70 isoform 2 (Fragment) 1.787270 0.0037311
UniRef50_UPI000387D4FF: PREDICTED: spermidine/spermine N(1)-acetyltransferase-like protein 1-like isoform X1 1.773936 0.0039887
UniRef50_UPI0003EAEEA6: PREDICTED: spermidine/spermine N(1)-acetyltransferase-like protein 1-like isoform X1 1.731149 0.0044790
UniRef50_UPI00046B9C67: PREDICTED: zinc finger protein 239-like 1.706392 0.0052273
UniRef50_H2PLR3 1.690212 0.0011983
UniRef50_UPI0003AB5B64 1.687717 0.0092839
UniRef50_Q646G0: Taste receptor type 2 member 46 1.658515 0.0025296
UniRef50_UPI00045DA955 1.657828 0.0015369
UniRef50_P06310: Ig kappa chain V-II region RPMI 6410 1.653949 0.0039865
UniRef50_J3QQQ9: KRAB-A domain-containing protein 2 1.652828 0.0012095
UniRef50_G3RNZ9 1.643521 0.0036612
UniRef50_Q8WMI5: Protocadherin beta 3’ (Fragment) 1.639896 0.0042772
UniRef50_H9K9F5 1.586861 0.0066564
UniRef50_B4DF53: cDNA FLJ53551, highly similar to Zinc finger protein 248 1.577706 0.0020450
UniRef50_F8W7C6: 60S ribosomal protein L10 1.573421 0.0026586
UniRef50_UPI00046B6145: PREDICTED: putative double homeobox protein 2, partial 1.544664 0.0084687
UniRef50_L9L4H1: High mobility group protein B1 1.529451 0.0014162
UniRef50_G5AQ84: Olfactory receptor 1.493635 0.0085284
UniRef50_F7A8K2 1.476652 0.0065107
UniRef50_P56545: C-terminal-binding protein 2 1.442803 0.0002386
UniRef50_Q2VAZ9: Spd4 1.431028 0.0091354
UniRef50_Q91YZ2: C-terminal binding protein 2, isoform CRA_b 1.427224 0.0004516
UniRef50_G3QH92 1.414881 0.0018520
UniRef50_UPI0003EAF13E: PREDICTED: LOW QUALITY PROTEIN: mucin-3A, partial 1.409414 0.0086321
UniRef50_I7GIU3: Macaca fascicularis brain cDNA clone: QflA-22647, similar to human DAZ interacting protein 1 (DZIP1), mRNA, RefSeq: NM_198968.1 1.404076 0.0051347
UniRef50_UPI0003EB002A 1.401988 0.0001113
UniRef50_UPI00045D9A97: PREDICTED: protocadherin beta-17 1.392696 0.0056254
UniRef50_P27635: 60S ribosomal protein L10 1.389956 0.0063634
UniRef50_C3Z1N2 1.381111 0.0083114
UniRef50_Q95M20: Envelope glycoprotein (Fragment) 1.366104 0.0002640
UniRef50_UPI000387AFAD: PREDICTED: NADH-ubiquinone oxidoreductase chain 5-like isoform X1 1.343646 0.0000548
UniRef50_UPI0003AB8CC4 1.315363 0.0002565
UniRef50_G0WMZ3: Histone H3 (Fragment) 1.300715 0.0033259
UniRef50_L7MA99: Putative integrin beta-like 1 (Fragment) 1.262451 0.0023312
UniRef50_P65239: Ribose-phosphate pyrophosphokinase 1 -12.532742 0.0000233
UniRef50_B5E2F8 -12.542684 0.0000179
UniRef50_G0IBT1: Integral membrane protein -12.547501 0.0000001
UniRef50_A3CRC4 -12.567026 0.0000885
UniRef50_Q74IL5: 50S ribosomal protein L27 -12.606708 0.0000154
UniRef50_Q8CZ89: UPF0297 protein spr0175 -12.610265 0.0002538
UniRef50_B2IN20 -12.614462 0.0000135
UniRef50_P66854: Single-stranded DNA-binding protein -12.626927 0.0001714
UniRef50_G0IAJ9 -12.754517 0.0000488
UniRef50_A8AXP8: 30S ribosomal protein S21 -12.781951 0.0000017
UniRef50_Q49UP7 -12.789827 0.0000005
UniRef50_T1Z2R4: Acyl carrier protein -12.793585 0.0001978
UniRef50_B1SBJ3 -12.839826 0.0000085
UniRef50_Q03IY7: Transposase -12.868076 0.0005401
UniRef50_P31306: Oligopeptide-binding protein SarA -12.869272 0.0000136
UniRef50_B1I837: UPF0356 protein SPH_0238 -12.870009 0.0000048
UniRef50_P64405 -12.870288 0.0000168
UniRef50_Q18CG9: 50S ribosomal protein L29 -12.873797 0.0000004
UniRef50_A4W3V5 -12.876910 0.0000098
UniRef50_A0A023VL47 -12.882949 0.0000243
UniRef50_Q6ZPA4: CDNA FLJ26171 fis, clone ADG03656 -12.907779 0.0000000
UniRef50_T0V883: Peptide deformylase -12.939411 0.0000001
UniRef50_M5MR97 -13.022733 0.0000019
UniRef50_M7MB60: Transcriptional regulator ComX1 -13.027114 0.0002164
UniRef50_Q035A1: 50S ribosomal protein L30 -13.029991 0.0000105
UniRef50_D6ZNE8 -13.066363 0.0000146
UniRef50_Q9NWI4: CDNA FLJ20837 fis, clone ADKA02602 -13.183296 0.0000000
UniRef50_A7HM29: Translation initiation factor IF-1 -13.221044 0.0001248
UniRef50_Q0TMQ9: 30S ribosomal protein S14 type Z -13.241974 0.0000244
UniRef50_UPI000387C557: PREDICTED: transmembrane protein 14C isoform X1 -13.307812 0.0000000
UniRef50_B5XL02 -13.315332 0.0000014
UniRef50_M4KYC9 -13.328328 0.0000200
UniRef50_Q88WK6: 50S ribosomal protein L28 -13.395329 0.0000037
UniRef50_A4VUC9 -13.546590 0.0000081
UniRef50_J7SHX6 -13.774661 0.0005302
UniRef50_H7QSD9 -13.806682 0.0009866
UniRef50_Q8NHA8: Olfactory receptor 1F12 -13.875289 0.0000000
UniRef50_UPI0002BB9C7E: hypothetical protein, partial -13.983423 0.0000037
UniRef50_UPI00027F40D7: PREDICTED: zinc finger protein DPF3-like -14.225737 0.0000000
UniRef50_K4AUT0 -14.330889 0.0000028
UniRef50_M1YD77 -14.345829 0.0006540
UniRef50_K0ZJH0 -14.646890 0.0000000
UniRef50_UPI0001D54BAB: PREDICTED: hypothetical protein LOC100423797 -14.710495 0.0000000
UniRef50_A4VYJ1 -16.099654 0.0007280
UniRef50_unknown -16.342427 0.0025047
UniRef50_UPI00027F211C -16.835101 0.0000000
UniRef50_Q8PJA2 -17.042285 0.0000000
UniRef50_UPI00045D7F14 -17.107391 0.0000000
UniRef50_B2DWT6 -17.108883 0.0000089
UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) -17.933310 0.0000000

Saliva vs buccal

Negative fold changes are higher in saliva samples while positive are higher in buccal samples.

sb = genes.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb = sb[sb['Saliva_Buccal_p'] < 0.01]
sb= sb.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb = pd.concat([sb.head(n=50), sb.tail(n=50)])
sb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_F2UXX5 7.315803 0.0099579
UniRef50_E2ANL9: Calcium-transporting ATPase sarcoplasmic/endoplasmic reticulum type (Fragment) 6.908305 0.0000913
UniRef50_L8ECE5: Alternative protein CELSR1 6.343497 0.0004922
UniRef50_UPI0000D9EED1: PREDICTED: intermediate conductance calcium-activated potassium channel protein 4-like 5.954222 0.0002641
UniRef50_B2GFL3: Thiamine-monophosphate kinase 5.914369 0.0079731
UniRef50_F9Q0X0: DNA polymerase III, beta subunit 5.903499 0.0043347
UniRef50_E8JV65 5.769518 0.0009157
UniRef50_Q28933: Insulin-like growth factor I (Fragment) 5.514441 0.0001828
UniRef50_R0P4C8: ABC transporter ATP-binding protein 5.499301 0.0001294
UniRef50_Q9NPP4-4: Isoform 4 of NLR family CARD domain-containing protein 4 5.462684 0.0005657
UniRef50_W5L103 5.415375 0.0004958
UniRef50_F0I1V1 5.405717 0.0041094
UniRef50_Q9H521 5.405141 0.0011870
UniRef50_A2D556: HDAC9 (Fragment) 5.277441 0.0008590
UniRef50_Q4T0Y0: Chromosome undetermined SCAF10794, whole genome shotgun sequence. (Fragment) 5.265559 0.0005947
UniRef50_R0N8S7: GTP cyclohydrolase I 5.158222 0.0008652
UniRef50_F7H3X2 4.861689 0.0011155
UniRef50_F9PM29: Peptidyl-tRNA hydrolase PTH2 4.835777 0.0066279
UniRef50_F2V0X4 4.817801 0.0025800
UniRef50_F8VU50: Cytochrome P450 4F12 (Fragment) 4.721608 0.0026254
UniRef50_O18892: Neuroendocrine-specific protein (Fragment) 4.628135 0.0007919
UniRef50_S7MTM3: Insulin growth factor-like family member 3 4.571237 0.0001902
UniRef50_M5BF77: 5-hydroxytryptamine (Serotonin) receptor 7a (Fragment) 4.436518 0.0001619
UniRef50_A7S0H5: Predicted protein (Fragment) 4.397030 0.0046533
UniRef50_Q6P1D3: Kif3b protein 4.377647 0.0013682
UniRef50_G9L0C0: Cyclin-dependent kinase 2-associated protein 2-like protein (Fragment) 4.329003 0.0097144
UniRef50_UPI00039402E6: PREDICTED: complement C1q tumor necrosis factor-related protein 1-like 4.314720 0.0012100
UniRef50_E3H3Y7 4.314125 0.0077289
UniRef50_H0YN86: DNA-binding protein SATB1 4.215396 0.0000863
UniRef50_E0PRL0 4.175238 0.0063785
UniRef50_G3V3U3: Gamma-aminobutyric acid receptor subunit beta-3 4.158766 0.0017780
UniRef50_M1EMH1: Differentially expressed in FDCP 8-like protein (Fragment) 4.137598 0.0034429
UniRef50_UPI00046B4812: PREDICTED: voltage-dependent calcium channel gamma-5 subunit-like 4.126605 0.0025449
UniRef50_F3PD35: Conserved domain protein 4.098196 0.0079865
UniRef50_E3GZW8 3.986665 0.0059652
UniRef50_P16952: Agglutinin receptor 3.960138 0.0012740
UniRef50_L9K3Q1: 40S ribosomal protein S7 3.925397 0.0098665
UniRef50_J0WNZ4: PF11377 family protein 3.902408 0.0088429
UniRef50_F2UXV1 3.898271 0.0035723
UniRef50_E3H5T2 3.871498 0.0099899
UniRef50_Q6ZWH7: cDNA FLJ41054 fis, clone STOMA1000189 3.866233 0.0053689
UniRef50_F2QAI7: Cytochrome b (Fragment) 3.846908 0.0038612
UniRef50_E9PSF1: RNA-binding protein 38 3.823172 0.0037154
UniRef50_UPI0003ACC708: PREDICTED: glutamine-rich protein 2-like 3.821570 0.0058664
UniRef50_L5KK24 3.813490 0.0031550
UniRef50_L8E857: Alternative protein ZC3H13 3.799492 0.0089465
UniRef50_UPI0003C12040: PREDICTED: zinc finger protein 280B-like 3.735111 0.0080621
UniRef50_A8AWT9 3.702641 0.0086479
UniRef50_B3GS42: Adrenergic receptor alpha-2A (Fragment) 3.678312 0.0008552
UniRef50_I3LCW0 3.673261 0.0083240
UniRef50_D3A6F4 -12.353034 0.0001144
UniRef50_E6KT76 -12.357022 0.0025087
UniRef50_A0A015Z1A5 -12.366172 0.0085094
UniRef50_C0EPA9 -12.413849 0.0000022
UniRef50_A7FPL6: 50S ribosomal protein L34 -12.451899 0.0000017
UniRef50_C5TLN7 -12.492172 0.0000091
UniRef50_I1YX70 -12.524738 0.0000131
UniRef50_J1LR62 -12.536216 0.0000000
UniRef50_F9DLQ2 -12.544585 0.0019333
UniRef50_F8WUF4 -12.556291 0.0002143
UniRef50_L9PWG5 -12.580060 0.0010930
UniRef50_I1YT36: Protein translocase subunit SecE -12.606614 0.0035617
UniRef50_I1ZK53 -12.632691 0.0000000
UniRef50_U3QCK3 -12.682848 0.0046113
UniRef50_F9DEH1 -12.685854 0.0000004
UniRef50_R5KBV9 -12.687679 0.0005809
UniRef50_D1QVL3 -12.709784 0.0000003
UniRef50_E6MPR7 -12.718797 0.0000013
UniRef50_F0F573 -12.724685 0.0069584
UniRef50_R5ZUD4 -12.735028 0.0047915
UniRef50_Q8R9W1: Acyl carrier protein -12.788626 0.0013549
UniRef50_D3A777 -12.820328 0.0000004
UniRef50_D3A200 -12.868877 0.0000001
UniRef50_D4TYA4 -12.871035 0.0068930
UniRef50_L1PXF5 -12.911877 0.0022917
UniRef50_D2ZXW7 -13.000393 0.0006094
UniRef50_E3H7K8: Protein translocase subunit SecE -13.004207 0.0076877
UniRef50_U5Q4G7 -13.040389 0.0011199
UniRef50_D3A4M7 -13.058006 0.0000000
UniRef50_D1QU93 -13.129398 0.0000007
UniRef50_G1VIN1 -13.180683 0.0000012
UniRef50_C9MRL3 -13.192778 0.0020007
UniRef50_R6EH14 -13.327954 0.0000030
UniRef50_D9RWE8 -13.350595 0.0000003
UniRef50_A7B8U2 -13.435091 0.0002265
UniRef50_J2ZDR2 -13.467517 0.0000000
UniRef50_D4CWL6 -13.473774 0.0041463
UniRef50_F9DEP4 -13.474756 0.0000000
UniRef50_F9DFJ4 -13.505607 0.0000008
UniRef50_E7RTC7 -13.520921 0.0000141
UniRef50_E1L0U7 -13.531869 0.0008408
UniRef50_C3WV96 -13.802692 0.0000199
UniRef50_D3A4C9 -13.989167 0.0000005
UniRef50_F0ET79 -14.215695 0.0005314
UniRef50_F2KZW6 -14.258185 0.0000000
UniRef50_F8WU95 -14.261922 0.0046479
UniRef50_UPI0002743795 -14.306087 0.0000000
UniRef50_Q67JS5: 50S ribosomal protein L33 -14.443151 0.0060342
UniRef50_L1N274 -15.056507 0.0000064
UniRef50_Q5FHQ2: 50S ribosomal protein L34 -15.113213 0.0018994

Saliva vs saliva_euk

Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.

ss = genes.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss = ss[ss['Saliva_Saliva_euk_p'] < 0.01]
ss = ss.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss = pd.concat([ss.head(n=50), ss.tail(n=50)])
ss.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_Q2EG36: Zonadhesin variant 6 (Fragment) 6.736133 0.0003704
UniRef50_E0CZ07: Mucolipin-1 (Fragment) 6.325679 0.0059547
UniRef50_G3IFH3: IQ and AAA domain-containing protein 1 5.615730 0.0066785
UniRef50_J3QRX4: Coordinator of PRMT5 and differentiation stimulator 5.404396 0.0099960
UniRef50_G9KLG5: Ring finger protein 183 (Fragment) 5.299399 0.0033525
UniRef50_M3WRY0 5.271822 0.0032388
UniRef50_J0LF75: CDP-alcohol phosphatidyltransferase 4.973907 0.0072992
UniRef50_J4SN93 4.912408 0.0064539
UniRef50_UPI0002A4C9E1: PREDICTED: serine/arginine-rich splicing factor 9-like isoform 2 4.853556 0.0038099
UniRef50_X6DTB7 4.724411 0.0014521
UniRef50_D2I8R3 4.415273 0.0034481
UniRef50_Q4RB69: Chromosome undetermined SCAF22150, whole genome shotgun sequence. (Fragment) 4.362065 0.0036359
UniRef50_W5NRZ1 4.306406 0.0047186
UniRef50_Q9NYJ1: Cytochrome c oxidase assembly factor 4 homolog, mitochondrial 4.273274 0.0011101
UniRef50_UPI000333A01E: PREDICTED: cytochrome c oxidase assembly factor 4 homolog, mitochondrial-like 4.149633 0.0007600
UniRef50_Q01BQ0: WGS project CAID00000000 data, contig chromosome 03 3.937558 0.0070409
UniRef50_C6SFZ1: Glycine dehydrogenase 3.760227 0.0086859
UniRef50_F0GZ41: Putative neutral zinc metallopeptidase 3.714295 0.0038094
UniRef50_F7CTS8 3.680199 0.0021871
UniRef50_E2B2T0 3.645847 0.0076141
UniRef50_K9KGU3: Zinc finger E-box-binding homeobox 2-like protein (Fragment) 3.589550 0.0045042
UniRef50_UPI000273E760: PREDICTED: insulin receptor-like 3.543139 0.0026163
UniRef50_UPI000441E382: PREDICTED: protein shisa-2 homolog 3.542102 0.0064692
UniRef50_UPI0003EAEF90: PREDICTED: zinc finger protein 77 isoform X1 3.530398 0.0085635
UniRef50_UPI00042AEC10: PREDICTED: protein shisa-2 homolog, partial 3.528187 0.0077543
UniRef50_Q4X8J5 3.087042 0.0096098
UniRef50_W5KCI9 3.006566 0.0070717
UniRef50_C4J898 2.993080 0.0030798
UniRef50_Q5U4M3: LOC495467 protein (Fragment) 2.850899 0.0031476
UniRef50_A7RSV6: Predicted protein 2.817247 0.0049236
UniRef50_U6DD35: FZD2 protein (Fragment) 2.808793 0.0012733
UniRef50_H3C2D3 2.743496 0.0029913
UniRef50_W7F324: 40S ribosomal protein S16 2.687886 0.0008769
UniRef50_A0A024R088: HCG1995470, isoform CRA_a (Fragment) 2.638056 0.0025685
UniRef50_H2RDM6 2.624960 0.0085736
UniRef50_G3IHF2: Putative proline-rich protein 21 2.519286 0.0059569
UniRef50_E2B5E4 2.480695 0.0068582
UniRef50_K0G5V9: Ribosomal protein L31-like protein 2.417748 0.0015082
UniRef50_H2L3C4 2.322147 0.0036513
UniRef50_R4G9V3 2.298731 0.0001725
UniRef50_C6GLY9 2.287017 0.0061704
UniRef50_UPI0003D080C3: PREDICTED: interferon alpha-1-like 2.184722 0.0053331
UniRef50_UPI00044221BE: PREDICTED: ribosome-binding protein 1-like 2.164684 0.0030225
UniRef50_H6TPI0: Translation elongation factor 1-alpha (Fragment) 2.079082 0.0021020
UniRef50_Q8PJA2 2.060768 0.0053642
UniRef50_UPI0003D0B7A4: PREDICTED: zinc finger protein 629-like 2.060686 0.0051971
UniRef50_UPI00046B6145: PREDICTED: putative double homeobox protein 2, partial 2.059583 0.0001653
UniRef50_Q9H492: Microtubule-associated proteins 1A/1B light chain 3A 2.039716 0.0092642
UniRef50_E5T9Q2 2.018052 0.0089603
UniRef50_UPI0003C26DC2: PREDICTED: zinc finger protein 436-like 2.010972 0.0056649
UniRef50_D4TZD9 -10.002404 0.0009972
UniRef50_K0YV53 -10.008045 0.0026087
UniRef50_E6MPZ2 -10.022918 0.0053824
UniRef50_J1BCC5: Putative lipoprotein -10.049852 0.0067986
UniRef50_C6R3D9 -10.055446 0.0044989
UniRef50_W1WGH3 -10.078205 0.0047290
UniRef50_L9PX91 -10.088781 0.0001431
UniRef50_D6LCM2 -10.095292 0.0006760
UniRef50_M5ICP3 -10.101189 0.0017222
UniRef50_E6MQB8 -10.102489 0.0012765
UniRef50_E6MLR7 -10.105446 0.0000378
UniRef50_E6MPQ7 -10.135183 0.0074584
UniRef50_M5LV42 -10.177302 0.0016998
UniRef50_T0V181 -10.194074 0.0053965
UniRef50_E6MP39 -10.210365 0.0000731
UniRef50_S2ZVU4 -10.229454 0.0081521
UniRef50_D4U2D1 -10.387840 0.0037682
UniRef50_I0T894 -10.391620 0.0000002
UniRef50_D1QS27 -10.443088 0.0000015
UniRef50_J0SXJ1 -10.455402 0.0095049
UniRef50_G6QX35: Lysyl-tRNA synthetase -10.465026 0.0010429
UniRef50_C2KZM8 -10.502725 0.0006551
UniRef50_Q9HTK8: Rubredoxin-2 -10.528115 0.0097021
UniRef50_J1B8M1 -10.540917 0.0023392
UniRef50_E1M6H0: L-asparaginase -10.571133 0.0028062
UniRef50_K2CG05 -10.655385 0.0000000
UniRef50_E6MML7 -10.787475 0.0041259
UniRef50_A4VUC9 -10.797531 0.0000027
UniRef50_E6MPU9 -10.813932 0.0000011
UniRef50_C6SPQ1 -10.875661 0.0007018
UniRef50_C6R611 -10.909512 0.0010026
UniRef50_G6ADU9 -11.040957 0.0069180
UniRef50_A7ZFG7 -11.063752 0.0017442
UniRef50_E6ML90 -11.289791 0.0000608
UniRef50_E8SPK9 -11.291495 0.0000004
UniRef50_U2L5W8 -11.370382 0.0035340
UniRef50_G0I7S2 -11.400386 0.0019163
UniRef50_E6MRQ0 -11.409751 0.0094672
UniRef50_E2ZDE4 -11.421551 0.0000102
UniRef50_E6MT18 -11.452743 0.0000023
UniRef50_F2CNP3 -11.531533 0.0000004
UniRef50_E6MTN0 -11.619507 0.0000024
UniRef50_J0SWN8 -11.709602 0.0000015
UniRef50_E6MRC7 -11.800971 0.0000019
UniRef50_J1BCP8 -11.883256 0.0000208
UniRef50_E6MN88 -12.108664 0.0000075
UniRef50_M0R3A6: Fms-related tyrosine kinase 3 ligand (Fragment) -12.187634 0.0000000
UniRef50_E6KT76 -12.357022 0.0000047
UniRef50_B1IBD3: 50S ribosomal protein L33 1 -13.286300 0.0095733
UniRef50_E7RTC7 -13.520921 0.0000141

Buccal vs buccal_euk

Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.

bbe = genes.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe = bbe[bbe['Buccal_Buccal_euk_p'] < 0.01]
bbe = bbe.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe = pd.concat([bbe.head(n=50), bbe.tail(n=50)])
bbe = bbe.groupby(by=bbe.index, axis=0).mean()
bbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_UPI0003D0E142: PREDICTED: exportin-6-like isoform X1 8.116484 0.0008299
UniRef50_F8WDA5: Nitrogen permease regulator 3-like protein 7.930798 0.0000000
UniRef50_D1M744: Gamma-aminobutyric acid A receptor beta 2 (Fragment) 7.386372 0.0045171
UniRef50_F8W7P5: Protein FAM92A1 6.842389 0.0025446
UniRef50_UPI00045DE035: PREDICTED: cadherin-10, partial 6.782436 0.0010449
UniRef50_C3ZX87 6.695182 0.0000021
UniRef50_L8HKY8 6.579498 0.0058675
UniRef50_F6T035 6.358616 0.0000631
UniRef50_H0YES9: ATP-sensitive inward rectifier potassium channel 11 (Fragment) 6.290137 0.0019962
UniRef50_W1W7N6: Formate acetyltransferase 5.909258 0.0020171
UniRef50_B3PMQ8: Hypothetical lipoprotein 5.692805 0.0000004
UniRef50_G7P543 5.658893 0.0090239
UniRef50_S9W349: Ribosomal protein L10 isoform 4-like protein 5.391158 0.0036590
UniRef50_C4J8P5 5.167383 0.0049123
UniRef50_UPI00042AD1E7: PREDICTED: disintegrin and metalloproteinase domain-containing protein 20-like, partial 5.076077 0.0078819
UniRef50_S4HKA8: LPXTG-motif protein cell wall anchor domain protein (Fragment) 4.634721 0.0000239
UniRef50_Q8N0Z0: Seven transmembrane helix receptor 4.523008 0.0022326
UniRef50_A0A016UU23 3.369483 0.0024479
UniRef50_F6WD18 2.869978 0.0024296
UniRef50_E2BJ09 2.808241 0.0060241
UniRef50_F8WAS3: NADH dehydrogenase [ubiquinone] 1 alpha subcomplex subunit 5 2.519171 0.0003033
UniRef50_H2R246 2.371328 0.0013049
UniRef50_U3JCC1 2.362967 0.0023879
UniRef50_UPI00020E5039: PREDICTED: golgin subfamily A member 8A-like 2.171549 0.0075876
UniRef50_UPI0002038DEF: PREDICTED: zinc finger protein 84-like 2.149222 0.0062307
UniRef50_D3ZHE0 2.046962 0.0036935
UniRef50_G5B3Z4: Olfactory receptor 8H1 2.009480 0.0084992
UniRef50_D1KFN6: Olfactory receptor (Fragment) 1.993650 0.0022319
UniRef50_Q32Q92-2: Isoform 2 of Acyl-coenzyme A thioesterase 6 1.969015 0.0080379
UniRef50_L9JBM4: Olfactory receptor 1.892725 0.0033091
UniRef50_I7G5N0: Macaca fascicularis brain cDNA, clone: QflA-18143 1.889867 0.0029983
UniRef50_UPI000387D707: PREDICTED: immunoglobulin superfamily member 3-like 1.873711 0.0008035
UniRef50_UPI00042C6F73: PREDICTED: immunoglobulin superfamily member 3-like, partial 1.783043 0.0002690
UniRef50_UPI0004542214: PREDICTED: zinc finger protein 239-like, partial 1.761047 0.0090689
UniRef50_M0R1E6 1.726397 0.0071132
UniRef50_UPI00046B9C67: PREDICTED: zinc finger protein 239-like 1.706392 0.0085283
UniRef50_L5KJS6: Polycystin-1 (Fragment) 1.692470 0.0064700
UniRef50_G5C4N0: ATP synthase lipid-binding protein, mitochondrial 1.682890 0.0056619
UniRef50_L5LHR9: Eukaryotic initiation factor 4A-I 1.613836 0.0026058
UniRef50_UPI0003942425: PREDICTED: 60S ribosomal protein L13a-like 1.592437 0.0099378
UniRef50_F7FDF6 1.592109 0.0056062
UniRef50_G7Q0J4 1.588236 0.0096193
UniRef50_L5MIL4: Eukaryotic initiation factor 4A-I 1.569020 0.0079874
UniRef50_P15105: Glutamine synthetase 1.563012 0.0014998
UniRef50_M3WYY8 1.528468 0.0031577
UniRef50_L9KZ86: Polycystin-1 1.508605 0.0090114
UniRef50_UPI00042BD796: PREDICTED: zinc finger protein 850-like 1.489909 0.0035273
UniRef50_Q6DET9: Elongation factor 1-beta 1.436267 0.0053110
UniRef50_Q8VGX7: Olfactory receptor 1.407683 0.0073447
UniRef50_M7AMJ2 1.406419 0.0088378
UniRef50_D2NR32: Predicted phosphatase/phosphohexomutase -9.055643 0.0053412
UniRef50_E4XC66: Whole genome shotgun assembly, allelic scaffold set, scaffold scaffoldA_459 -9.065122 0.0047685
UniRef50_K0JQ32 -9.092686 0.0000009
UniRef50_E5RIU9: Charged multivesicular body protein 7 (Fragment) -9.108811 0.0000000
UniRef50_H6PBP7: TfoX N-terminal domain family protein -9.147272 0.0000176
UniRef50_Q8G526: 4-hydroxy-tetrahydrodipicolinate reductase -9.174536 0.0031181
UniRef50_Q82MT8: 50S ribosomal protein L31 type B 1 -9.175704 0.0030548
UniRef50_D8UNZ7 -9.192506 0.0000083
UniRef50_S4WAA0 -9.192524 0.0089845
UniRef50_P37079: Sorbitol-6-phosphate 2-dehydrogenase -9.207171 0.0096250
UniRef50_E3GZV7 -9.210175 0.0000132
UniRef50_C5C0V6: Exodeoxyribonuclease 7 small subunit -9.213471 0.0000401
UniRef50_U1R6U4 -9.307144 0.0000542
UniRef50_C0MD90 -9.326540 0.0000410
UniRef50_D2NPD9: Type IV secretory pathway, VirB11 component -9.353039 0.0076499
UniRef50_E3H441: 2-oxoglutarate dehydrogenase, E2 component, dihydrolipoamide succinyltransferase -9.384205 0.0000000
UniRef50_D6S6C8 -9.437258 0.0085244
UniRef50_A0A024DF77 -9.445438 0.0001582
UniRef50_D2NQ61: Transcriptional regulator -9.458625 0.0090883
UniRef50_Q6A6M8: 50S ribosomal protein L23 -9.465607 0.0054282
UniRef50_Q01WB8: 30S ribosomal protein S11 -9.508923 0.0056135
UniRef50_E4STI3 -9.550146 0.0001121
UniRef50_Q744X6: 30S ribosomal protein S18 1 -9.568566 0.0000144
UniRef50_A1SNJ0: 30S ribosomal protein S11 -9.590006 0.0067633
UniRef50_G5EST0 -9.608321 0.0000251
UniRef50_Q88XW2: 30S ribosomal protein S13 -9.621529 0.0061989
UniRef50_M5KMU2 -9.646129 0.0000184
UniRef50_M1DI68 -9.653628 0.0000075
UniRef50_J9P2F6 -9.655583 0.0000000
UniRef50_F9EJV5 -9.686258 0.0000166
UniRef50_E3H5A0: Molybdenum-pterin-binding protein 2 -9.693693 0.0000028
UniRef50_D8UND8 -9.729680 0.0000235
UniRef50_I6XZ28 -9.855039 0.0000000
UniRef50_B3EUK4: 50S ribosomal protein L15 -9.872957 0.0081978
UniRef50_A5TSU1 -10.024282 0.0000002
UniRef50_T0T1M2: Nitroimidazole resistance protein, putative -10.081178 0.0047272
UniRef50_A5TX05 -10.298841 0.0000168
UniRef50_I1ZN41: IS200 family transposase -10.329254 0.0072293
UniRef50_S4L0C2 -10.393216 0.0023871
UniRef50_C4L7S9: 30S ribosomal protein S10 -10.428054 0.0099966
UniRef50_A0A024DHB8: Membrane protein -10.715539 0.0061477
UniRef50_H0Y539: SHC-transforming protein 1 (Fragment) -10.885901 0.0000000
UniRef50_E4DIQ9 -10.924723 0.0001226
UniRef50_B5E5S9 -11.066803 0.0094706
UniRef50_W1XIF5 -11.316334 0.0089897
UniRef50_A5TSU5 -11.581322 0.0003144
UniRef50_B5E2F8 -12.542684 0.0064844
UniRef50_Q8N7Y7: cDNA FLJ40209 fis, clone TESTI2020999 -13.065945 0.0000000
UniRef50_UPI00045D7F14 -17.107391 0.0000000
UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) -17.933310 0.0000000

Blood vs saliva_euk

Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.

blbe = genes.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe = blbe[blbe['Blood_Saliva_euk_p'] < 0.01]
blbe = blbe.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe = pd.concat([blbe.head(n=50), blbe.tail(n=50)])
blbe = blbe.groupby(by=blbe.index, axis=0).mean()
blbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_Q8PJA2 17.525345 0.0000000
UniRef50_R0ENV9: Exodeoxyribonuclease III (Xth) 17.088123 0.0000000
UniRef50_unknown 16.964790 0.0025297
UniRef50_Q9NRE8: PADI-H protein 14.939340 0.0000000
UniRef50_UPI0001D54BAB: PREDICTED: hypothetical protein LOC100423797 14.758022 0.0000000
UniRef50_A4VYJ1 14.649395 0.0001501
UniRef50_Q8NHA8: Olfactory receptor 1F12 14.057480 0.0000000
UniRef50_P66235: 50S ribosomal protein L33 3 13.942107 0.0000002
UniRef50_G3RYC5 13.881953 0.0000000
UniRef50_H0YGN8: PCTP-like protein (Fragment) 13.351819 0.0000000
UniRef50_UPI000387C557: PREDICTED: transmembrane protein 14C isoform X1 13.317821 0.0000000
UniRef50_T0V883: Peptide deformylase 13.169019 0.0000004
UniRef50_R6WM19 13.108296 0.0003066
UniRef50_J7SHX6 13.103721 0.0000124
UniRef50_F7ISS9 12.969938 0.0000000
UniRef50_Q6ZPA4: CDNA FLJ26171 fis, clone ADG03656 12.873942 0.0000000
UniRef50_F5XS62 12.824878 0.0000049
UniRef50_UPI000387BEC9 12.799955 0.0000000
UniRef50_C7T763: Conserved protein 12.783316 0.0000115
UniRef50_Q71RD9: PP10764 12.779946 0.0000000
UniRef50_K4AUT0 12.644386 0.0000001
UniRef50_H0YBG6: Stress-70 protein, mitochondrial (Fragment) 12.622281 0.0000000
UniRef50_Q5FHQ2: 50S ribosomal protein L34 12.594049 0.0000069
UniRef50_A5TSU5 12.406862 0.0008557
UniRef50_V8CH43 12.385584 0.0000001
UniRef50_F9EJV5 12.306094 0.0000925
UniRef50_Q6YL41 12.219578 0.0000000
UniRef50_O83217: Elongation factor Tu 12.147498 0.0000005
UniRef50_I1YR15: PF14128 domain protein 12.096193 0.0000005
UniRef50_H2PQZ3 12.030992 0.0000000
UniRef50_C4L7S9: 30S ribosomal protein S10 11.916521 0.0000430
UniRef50_Q73JJ6: 50S ribosomal protein L7/L12 11.879895 0.0012062
UniRef50_C9MRL3 11.879281 0.0000007
UniRef50_D3ID43 11.817884 0.0000043
UniRef50_A4SCS1: 50S ribosomal protein L5 11.780822 0.0000021
UniRef50_P25461: 50S ribosomal protein L33, chloroplastic 11.644994 0.0000012
UniRef50_M1YD77 11.638134 0.0000649
UniRef50_Q73WG1: Serine hydroxymethyltransferase 11.608425 0.0000180
UniRef50_Q28UY2: 50S ribosomal protein L11 11.607036 0.0000170
UniRef50_Q7WK82: ATP-dependent Clp protease ATP-binding subunit ClpX 11.574924 0.0000125
UniRef50_B1ZNE5: 50S ribosomal protein L2 11.564315 0.0002025
UniRef50_O31678: NADPH-dependent 7-cyano-7-deazaguanine reductase 11.563929 0.0000492
UniRef50_G0IBT1: Integral membrane protein 11.552031 0.0000063
UniRef50_Q5WII6: IS605/IS200 family transposase 11.548898 0.0000002
UniRef50_B2GAM0: Enolase 11.542368 0.0000166
UniRef50_Q2S3P1: 30S ribosomal protein S11 11.525881 0.0000135
UniRef50_Q67JV0: 50S ribosomal protein L16 11.503054 0.0000362
UniRef50_A5ZLI2 11.495763 0.0000058
UniRef50_R7JXR0: Transposase IS200-like protein 11.473894 0.0000000
UniRef50_Q8A276: 30S ribosomal protein S20 11.468189 0.0004102
UniRef50_P03886: NADH-ubiquinone oxidoreductase chain 1 -1.528942 0.0070742
UniRef50_Q36644: Cytochrome b (Fragment) -1.530727 0.0052435
UniRef50_P00395: Cytochrome c oxidase subunit 1 -1.531205 0.0001640
UniRef50_A8NTP2: Protein Bm983 -1.547804 0.0037564
UniRef50_H9M4B7: Cytochrome c oxidase subunit 1 -1.559664 0.0003397
UniRef50_X2D544: Cytochrome c oxidase subunit 2 (Fragment) -1.573120 0.0039919
UniRef50_Q35805: Cytochrome b (Fragment) -1.581162 0.0059227
UniRef50_D3DU34: HCG1817947, isoform CRA_a -1.594312 0.0090239
UniRef50_G1Q286 -1.595514 0.0020015
UniRef50_P50656: Cytochrome c oxidase subunit 1 (Fragment) -1.609859 0.0010747
UniRef50_K9LQB2: Cytochrome b (Fragment) -1.619860 0.0048235
UniRef50_Q580R0 -1.631223 0.0002570
UniRef50_G7K0E8 -1.667354 0.0031348
UniRef50_Q8WMI5: Protocadherin beta 3’ (Fragment) -1.677626 0.0094756
UniRef50_P82046: Cytochrome b (Fragment) -1.681069 0.0023857
UniRef50_M4NJR0: Cytochrome b (Fragment) -1.681089 0.0007053
UniRef50_B3UYF0: Cytochrome c oxidase subunit 1 -1.686232 0.0003859
UniRef50_R4PTX6: NADH-ubiquinone oxidoreductase chain 1 (Fragment) -1.703776 0.0010108
UniRef50_F6PXC3: Histone H3 (Fragment) -1.724211 0.0047300
UniRef50_H6BD92: ATP synthase FO subunit 6 (Fragment) -1.729135 0.0025152
UniRef50_H2PLR3 -1.734748 0.0051965
UniRef50_A0EXD5: Cytochrome b (Fragment) -1.755071 0.0014336
UniRef50_P00403: Cytochrome c oxidase subunit 2 -1.776871 0.0041089
UniRef50_D8LAJ2: NADH-ubiquinone oxidoreductase chain 1 (Fragment) -1.783306 0.0012416
UniRef50_P03905: NADH-ubiquinone oxidoreductase chain 4 -1.846289 0.0016293
UniRef50_Q27GW8: NADH-ubiquinone oxidoreductase chain 1 -1.905353 0.0087612
UniRef50_S6IEN3 -1.923147 0.0071962
UniRef50_E1GF92 -1.929251 0.0069511
UniRef50_P03915: NADH-ubiquinone oxidoreductase chain 5 -1.932182 0.0009014
UniRef50_F7I766: Taste receptor type 2 -1.944672 0.0051698
UniRef50_A1XEE7: COX1 (Fragment) -1.954167 0.0006538
UniRef50_P03901: NADH-ubiquinone oxidoreductase chain 4L -2.107066 0.0085070
UniRef50_V4LU59 -2.203396 0.0038071
UniRef50_P0CG47: Polyubiquitin-B -2.252169 0.0072864
UniRef50_UPI0003688413: hypothetical protein -2.429962 0.0058653
UniRef50_W1YJZ2 -2.490203 0.0093057
UniRef50_Q8HXG2: Macaca fascicularis brain cDNA clone: QflA-20071, similar to human ATP synthase 6 (MTATP6), mRNA, RefSeq: NM_173702.1 -2.686430 0.0003784
UniRef50_J9E765 -2.692402 0.0036926
UniRef50_I6LBK7: Cytochrome c oxidase subunit 1 (Fragment) -2.759758 0.0055914
UniRef50_S8BUY4 -3.025371 0.0049229
UniRef50_X1FQ44: Marine sediment metagenome DNA, contig: S03H2_L09072 (Fragment) -3.040754 0.0008733
UniRef50_W1WD51 -3.123316 0.0033155
UniRef50_V8PE67: Tyrosine-protein phosphatase non-receptor type 11 (Fragment) -3.262423 0.0061165
UniRef50_W1V5A8 -3.600586 0.0020817
UniRef50_W5I2W3: Cytochrome c oxidase subunit 3 -4.202874 0.0065131
UniRef50_I1SRR0: NADH dehydrogenase subunit 4 (Fragment) -4.524950 0.0082184
UniRef50_H9FCS1: Mediator of DNA damage checkpoint protein 1 (Fragment) -4.664635 0.0093509
UniRef50_H9FCW3: Mediator of DNA damage checkpoint protein 1 (Fragment) -4.789376 0.0089735
UniRef50_U5Q3P3: Cytochrome c oxidase subunit 2 (Fragment) -5.144596 0.0034020
UniRef50_S9WFI9 -7.807100 0.0001590

Blood vs buccal_euk

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

blse = genes.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse = blse[blse['Blood_Buccal_euk_p'] < 0.01]
blse = blse.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse = pd.concat([blse.head(n=50), blse.tail(n=50)])
blse = blse.groupby(by=blse.index, axis=0).mean()
blse.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_Q8NHA8: Olfactory receptor 1F12 14.206455 0.0000000
UniRef50_Q6ZRH3 13.568667 0.0000000
UniRef50_Q6Y2K7: LP5852 protein 11.728181 0.0000000
UniRef50_K4AUT0 11.358313 0.0020925
UniRef50_Q18CG9: 50S ribosomal protein L29 10.819685 0.0000001
UniRef50_C9J0J3: Ectonucleoside triphosphate diphosphohydrolase 3 (Fragment) 10.401278 0.0000000
UniRef50_Q99JC0: RRNA promoter binding protein 10.400075 0.0000000
UniRef50_D6R9G5: Kelch-like protein 2 (Fragment) 10.279457 0.0000000
UniRef50_Q0TMQ9: 30S ribosomal protein S14 type Z 10.162296 0.0009675
UniRef50_A4VYJ1 10.067180 0.0019393
UniRef50_P0A2W0: Acyl carrier protein 10.054894 0.0000272
UniRef50_Q836B1: UPF0473 protein EF_1204 9.974626 0.0000055
UniRef50_A4W3M3: 3-hydroxyacyl-[acyl-carrier-protein] dehydratase FabZ 9.769790 0.0000723
UniRef50_F5XS62 9.763733 0.0008645
UniRef50_UPI00029DB511: PREDICTED: E3 ubiquitin-protein ligase Itchy homolog 9.720111 0.0000000
UniRef50_F2QEQ4: Rhodanese-related sulfurtransferase 9.711035 0.0006302
UniRef50_UPI000377D7E9: hypothetical protein, partial 9.694725 0.0000502
UniRef50_I1ZKW3 9.563515 0.0000809
UniRef50_G0ICB9 9.514858 0.0000443
UniRef50_A8RZY1 9.453512 0.0070344
UniRef50_Q67MT5: Peptide chain release factor 3 9.276501 0.0002504
UniRef50_T1Z5N5: 50S ribosomal protein L3 9.236708 0.0003293
UniRef50_Q02419: Protein PhnA 9.125880 0.0000689
UniRef50_Q2FFI7: Adenylosuccinate lyase 9.089428 0.0002281
UniRef50_A1WZJ2: 60 kDa chaperonin 9.049558 0.0002462
UniRef50_G0IBT1: Integral membrane protein 9.018158 0.0000643
UniRef50_A4QMB1: ORF58e 8.988876 0.0000163
UniRef50_Q5WII6: IS605/IS200 family transposase 8.983484 0.0000330
UniRef50_Q5FLZ3: tRNA N6-adenosine threonylcarbamoyltransferase 8.975426 0.0000814
UniRef50_Q97R65: Dihydroorotate dehydrogenase B (NAD(+)), catalytic subunit 8.963234 0.0000471
UniRef50_P67185: Probable transcriptional regulatory protein SAG1645 8.933849 0.0000210
UniRef50_X1NBY2: Marine sediment metagenome DNA, contig: S06H3_S14577 8.925777 0.0000003
UniRef50_P14949: Thioredoxin 8.916874 0.0000713
UniRef50_Q9JTA4: UPF0210 protein NMA1908 8.905626 0.0002581
UniRef50_Q839Y4: Adenylosuccinate synthetase 8.875518 0.0001953
UniRef50_S7YHL0 8.867961 0.0000160
UniRef50_A4VYP1: 30S ribosomal protein S10 8.846283 0.0000723
UniRef50_Q831A8: UDP-N-acetylglucosamine 1-carboxyvinyltransferase 2 8.838642 0.0001587
UniRef50_A8AVF5: Isochorismatase family protein 8.831903 0.0001626
UniRef50_UPI000273D5FE 8.828918 0.0000000
UniRef50_Q6GDQ0: ATP-dependent Clp protease ATP-binding subunit ClpL 8.761473 0.0001248
UniRef50_C1CQE1: UPF0340 protein SPT_0687 8.761093 0.0006395
UniRef50_G7SQ06: Cell wall-associated hydrolase 8.744909 0.0003129
UniRef50_UPI0001D62081 8.733988 0.0000000
UniRef50_P37747: UDP-galactopyranose mutase 8.726289 0.0001058
UniRef50_P90591: PV14 protein 8.698111 0.0000001
UniRef50_Q1C306: Serine/threonine transporter SstT 8.670691 0.0001549
UniRef50_A3CNC4: Rhodanese-like domain protein, putative 8.650163 0.0000002
UniRef50_A0A024DGI8: Membrane protein 8.634631 0.0000937
UniRef50_Q3K3S9: tRNA pseudouridine synthase A 8.615080 0.0002396
UniRef50_I7G8L2: Macaca fascicularis brain cDNA clone: QmoA-11833, similar to human chemokine (C-C motif) ligand 5 (CCL5), mRNA, RefSeq: NM_002985.2 -3.057083 0.0044197
UniRef50_UPI000273D3DB: PREDICTED: histone demethylase UTY-like -3.058105 0.0006568
UniRef50_X1RTY3: Marine sediment metagenome DNA, contig: S12H4_L05131 -3.088100 0.0001320
UniRef50_Q6ZNX4: CDNA FLJ26942 fis, clone RCT07464 -3.088863 0.0000248
UniRef50_Q9BZ60: FKSG63 -3.094164 0.0043170
UniRef50_H2PBI0 -3.194845 0.0050537
UniRef50_Q8NF05: FLJ00399 protein (Fragment) -3.195422 0.0017753
UniRef50_G3RYZ2 -3.202026 0.0017820
UniRef50_Q1W209: Embryonic stem cell-related gene protein -3.298909 0.0040440
UniRef50_UPI00045E37E4: PREDICTED: protein GVQW1-like, partial -3.521571 0.0021273
UniRef50_F6TJB2 -3.599640 0.0073896
UniRef50_W1YJZ2 -3.669211 0.0000867
UniRef50_F7GRN4 -3.714752 0.0009200
UniRef50_Q6ZUG4: cDNA FLJ43741 fis, clone TESTI2017727 -3.918750 0.0026472
UniRef50_Q6FG63: FLJ10385 protein -4.086130 0.0029433
UniRef50_W1WD51 -4.129838 0.0000402
UniRef50_O95662: KpnI repetitive sequence (T-betaG41) 3kb downstream of beta-globin protein (Fragment) -4.204094 0.0015056
UniRef50_Q8WY51: HC6 -4.225498 0.0064566
UniRef50_S8BUY4 -4.259124 0.0000409
UniRef50_F6S8D7 -4.301492 0.0058294
UniRef50_Q8N329: C3orf64 protein -4.310691 0.0051846
UniRef50_I7G4S6: Macaca fascicularis brain cDNA clone: QbsB-10410, similar to human hypothetical protein FLJ20457 (FLJ20457), mRNA, RefSeq: NM_017832.2 -4.419854 0.0067237
UniRef50_F7GW59 -4.437119 0.0000852
UniRef50_Q8N287: cDNA FLJ33669 fis, clone BRAMY2028740 -4.463438 0.0061385
UniRef50_W1V5A8 -4.529869 0.0000501
UniRef50_Q6ZR97: cDNA FLJ46534 fis, clone THYMU3037052, weakly similar to Homo sapiens HIV TAT specific factor 1 (HTATSF1) -4.612080 0.0042394
UniRef50_UPI000292A7A2 -4.755766 0.0050788
UniRef50_G2HH39: Zinc finger protein 580 -4.874037 0.0066219
UniRef50_F7HM41 -5.239724 0.0041429
UniRef50_G9L4Y9 -5.290411 0.0077388
UniRef50_F7ERF9 -5.464497 0.0084036
UniRef50_UPI0001AF4819: hypothetical protein -5.485818 0.0013148
UniRef50_I7G402: Macaca fascicularis brain cDNA clone: QbsA-12013, similar to human glutamate receptor, metabotropic 6 (GRM6), mRNA, RefSeq: NM_000843.2 -5.510659 0.0026030
UniRef50_G7PG11 -5.567385 0.0067932
UniRef50_Q96LU0: CDNA FLJ25069 fis, clone CBL05145 -5.625021 0.0047341
UniRef50_Q9UI61: PRO0470 -5.839999 0.0088171
UniRef50_Q9GMT0 -5.905245 0.0092684
UniRef50_Q6ZNZ6: CDNA FLJ26821 fis, clone PRS06629 -5.952807 0.0070262
UniRef50_M3Z9H5 -6.138953 0.0025770
UniRef50_F7CR38 -6.162863 0.0077810
UniRef50_X6R7Y7: Intraflagellar transport protein 25 homolog -6.496334 0.0059577
UniRef50_K7EJ15: Ankyrin repeat domain-containing protein 20A3 (Fragment) -6.986405 0.0002879
UniRef50_F6ZQ59 -7.076242 0.0070378
UniRef50_Q4XHH2 -7.100608 0.0066861
UniRef50_Q8N210: Retbindin -7.152420 0.0042904
UniRef50_H7BYS4 -7.256297 0.0030867
UniRef50_Q4Y9P0 -7.638074 0.0045271
UniRef50_Q8WTZ3: Zinc finger protein ENSP00000375192 -8.305599 0.0091100
UniRef50_S9WFI9 -9.303851 0.0000001
UniRef50_S4L0C2 -9.774147 0.0042497

HUMAnN3

genes = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/genes.csv', header=0, index_col=0)

Blood vs saliva

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

bs3 = genes.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs3 = bs3[bs3['Saliva_Blood_p'] < 0.01]
bs3 = bs3.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs3 = pd.concat([bs3.head(n=50), bs3.tail(n=50)])
bs3 = bs3.groupby(by=bs3.index, axis=0).mean()
bs3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef90_A0A059UGD1 7.822147 0.0014155
UniRef50_Q5UGI3 5.981764 0.0092449
UniRef50_A7SYF9 4.726831 0.0007634
UniRef50_UPI000D0C899F 4.688802 0.0069116
UniRef50_A0A1A9AHF0 4.229893 0.0036662
UniRef50_A0A0D9S5U8 4.186541 0.0000897
UniRef90_A0A0B7AG49 4.082921 0.0011859
UniRef90_A0A1A9AHF0 3.974740 0.0022994
UniRef90_UPI0005298A3D 3.961044 0.0032579
UniRef50_R4GNJ2: 60S ribosomal protein L5 3.947971 0.0013257
UniRef90_UPI000CEF8CC7 3.860900 0.0082239
UniRef90_UPI000D0A1051 3.849842 0.0014695
UniRef50_F6QPS0: Cell division cycle protein 27 homolog 3.832752 0.0082278
UniRef90_F6TZJ6 3.817980 0.0016388
UniRef90_A0A2V2KWK9 3.769290 0.0000094
UniRef50_P62975: Ubiquitin 3.740375 0.0039747
UniRef50_A0A084WT89 3.740220 0.0039688
UniRef50_A0A0A6YW67 3.740220 0.0039688
UniRef50_UPI000D671FA2 3.736353 0.0039639
UniRef90_F7DPM2 3.714813 0.0000831
UniRef90_A0A2U2VZN6 3.708737 0.0046169
UniRef50_UPI0008F51C75 3.703223 0.0013197
UniRef90_H2NHX1 3.685370 0.0003670
UniRef50_A0A2U2VZN6 3.682427 0.0048597
UniRef90_F7DUA9 3.649735 0.0002683
UniRef50_A0A2V2KWK9 3.614798 0.0000136
UniRef90_F7A9Z7 3.604416 0.0002045
UniRef90_A0A2I3T9X4 3.586077 0.0017932
UniRef90_A0A2U2VXH9 3.584093 0.0007692
UniRef50_F7DUA9 3.571522 0.0004647
UniRef50_A0A023AVK8: Putative endonuclease/reverse transcriptase 3.481009 0.0016895
UniRef90_UPI000D09E8B4 3.460189 0.0022208
UniRef90_A0A2U2VWN7 3.455927 0.0076869
UniRef50_G3SIN0 3.444951 0.0014334
UniRef90_K0Z2P5 3.426548 0.0026335
UniRef90_A0A2D4PUW7 3.422458 0.0003262
UniRef90_A0A2R8MFL8 3.415982 0.0001580
UniRef90_A0A2R8Z5V9 3.414382 0.0022742
UniRef90_A0A2I3SFR4 3.404313 0.0079436
UniRef90_F2VPV5 3.383545 0.0018872
UniRef90_G3S285 3.381721 0.0013277
UniRef50_A0A2U2VXH9 3.357922 0.0004359
UniRef90_S9X3V4 3.350696 0.0036189
UniRef90_H2PTC1 3.348218 0.0001353
UniRef50_A0A2U2VWN7 3.342515 0.0082517
UniRef90_G7NGG9 3.340707 0.0001180
UniRef50_UPI000B7B8E00 3.332866 0.0020228
UniRef90_Q99878 3.328294 0.0034219
UniRef50_UPI0008F4FD63 3.323762 0.0000537
UniRef90_Q4G1Z1 3.318735 0.0023210
UniRef90_A0A134BX12 -13.225572 0.0000000
UniRef90_C9MQ90 -13.229306 0.0000000
UniRef90_D3I8L8 -13.245145 0.0000000
UniRef90_I0GNP0 -13.259245 0.0000008
UniRef90_A7B8Q4 -13.271813 0.0000000
UniRef90_G0ICA3 -13.295380 0.0000002
UniRef90_A0A2D3LM49 -13.369449 0.0000000
UniRef90_F8DKM2 -13.372066 0.0000002
UniRef90_A0A2C6B6C9 -13.374933 0.0012569
UniRef90_C9N1P8 -13.387055 0.0000002
UniRef90_G1WBW8 -13.387095 0.0000000
UniRef90_D9RRC6 -13.421057 0.0000002
UniRef90_G5GL71 -13.433467 0.0002349
UniRef50_A0A2J8WD87 -13.497056 0.0000000
UniRef90_X5NU12 -13.504789 0.0000000
UniRef90_A7B8Q3 -13.583736 0.0000000
UniRef90_U2J5N0 -13.604911 0.0000000
UniRef90_A0A139P8V9 -13.685165 0.0000000
UniRef90_A7B8Q2 -13.692794 0.0000000
UniRef90_K7ESR6 -13.698188 0.0000000
UniRef90_A0A134C1V4 -13.706270 0.0000000
UniRef90_G3RWI0 -13.715008 0.0000000
UniRef90_Q8CWN7 -13.731959 0.0000000
UniRef90_E4LCZ2 -13.743743 0.0000000
UniRef90_D4TVU3 -13.746518 0.0000003
UniRef90_A0A0K2J3Z9 -13.803028 0.0000041
UniRef90_K0ZJH0 -13.821083 0.0000000
UniRef90_D3IDX3 -13.840488 0.0001256
UniRef90_J7TNS4 -13.858979 0.0000000
UniRef90_V8BHM6 -13.946408 0.0000000
UniRef50_Q9HC73-2: Isoform 2 of Cytokine receptor-like factor 2 -14.139921 0.0000000
UniRef90_A0A2D3LLY2 -14.383255 0.0000009
UniRef90_Q6TDT1 -14.420960 0.0000000
UniRef50_Q6TDT1: Protein transactivated by hepatitis B virus E antigen -14.482286 0.0000000
UniRef90_A0A2I2YPF9 -14.513736 0.0000000
UniRef90_Q8RHH9 -14.516243 0.0011083
UniRef90_A0A134A205 -14.624728 0.0000001
UniRef90_U2Z4S8 -14.773062 0.0000000
UniRef90_X6PVX8 -14.810247 0.0000000
UniRef90_R5FPP1 -14.840933 0.0000000
UniRef90_J7TUV6 -14.890002 0.0000000
UniRef50_G3RWI0 -14.990248 0.0000000
UniRef90_A0A2I1TTC0 -15.008420 0.0000000
UniRef90_U2ZPB1 -15.237430 0.0000000
UniRef90_C4V1U8 -15.399598 0.0000001
UniRef90_J7SHX6 -15.431610 0.0000000
UniRef90_C9MYW9 -15.964145 0.0000000
UniRef90_A0A134C1F2 -15.999313 0.0000002
UniRef90_Q5LXM5 -16.079023 0.0000000
UniRef90_V8M0W9 -17.605024 0.0000000

Blood vs buccal

Negative fold changes are higher in buccal samples while positive are higher in blood samples.

bb3 = genes.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb3 = bb3[bb3['Buccal_Blood_p'] < 0.01]
bb3 = bb3.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb3 = pd.concat([bb3.head(n=50), bb3.tail(n=50)])
bb3 = bb3.groupby(by=bb3.index, axis=0).mean()
bb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_UPI000D0C899F 10.129773 0.0019113
UniRef90_A0A059UGD1 7.436453 0.0067404
UniRef90_B0X777 2.575430 0.0052864
UniRef90_A0A2M4CW35 2.444204 0.0044833
UniRef90_R4G555 2.444032 0.0044835
UniRef90_D2HWI9 2.443863 0.0044837
UniRef90_P84040 2.443863 0.0044837
UniRef90_J9NRV0 2.443863 0.0044837
UniRef90_P35059 2.326871 0.0048374
UniRef90_A0A096MDK0 2.313180 0.0048180
UniRef90_H0ZGB0 2.312709 0.0048194
UniRef90_U3I341 2.312378 0.0048204
UniRef90_W5QFQ2 2.312378 0.0048204
UniRef90_W5PHH3 2.312378 0.0048204
UniRef90_G3U9V5 2.312205 0.0048210
UniRef90_X1WGM1 2.312028 0.0048215
UniRef90_UPI0003C8CAF6 2.312028 0.0048215
UniRef90_G3U1Z3 2.312028 0.0048215
UniRef90_G3U8T3 2.312028 0.0048215
UniRef90_G3UKU6 2.312028 0.0048215
UniRef90_UPI00073339EF 2.312028 0.0048215
UniRef90_P62805 2.311852 0.0048221
UniRef50_F6X6A6 2.027367 0.0022089
UniRef90_P61247 2.013804 0.0008346
UniRef90_UPI00075FE3C9 1.992473 0.0093257
UniRef90_A0A2U2VXH9 1.983799 0.0022542
UniRef90_H2NHX1 1.954676 0.0002081
UniRef90_X1V349 1.870192 0.0081726
UniRef90_D6RAT0 1.862069 0.0014769
UniRef90_A0A2R8MFL8 1.811106 0.0001598
UniRef50_A0A2U2VXH9 1.802437 0.0017445
UniRef90_A0A1U7RBC3 1.801536 0.0013585
UniRef90_Q4GWZ2 1.791779 0.0056083
UniRef90_Q3TXR5 1.791113 0.0050731
UniRef90_P14206 1.778647 0.0033932
UniRef90_G1N211 1.774490 0.0024735
UniRef90_A0A091FAX3 1.774434 0.0024739
UniRef90_UPI000C313812 1.765998 0.0001919
UniRef50_UPI0008F51C75 1.712422 0.0004572
UniRef90_A0A2I0MJF5 1.696810 0.0001676
UniRef90_A0A2I3T9X4 1.690091 0.0000037
UniRef50_A0A286ZVP5 1.687382 0.0007155
UniRef50_G3SIN0 1.677557 0.0005188
UniRef50_A0A2S2P7V1 1.672817 0.0043278
UniRef90_H2PTC1 1.666913 0.0011545
UniRef90_UPI000D0A1051 1.654833 0.0004163
UniRef90_UPI000D09E8B4 1.625457 0.0029957
UniRef90_Q7TP33 1.599129 0.0013668
UniRef50_A0A2V2KWK9 1.595489 0.0005246
UniRef90_A0A1D5RDK4 1.588584 0.0050427
UniRef90_Q9F0R4 -13.037667 0.0000163
UniRef90_A0A0T8AUD9 -13.047165 0.0000107
UniRef90_A0A0E2UGZ4 -13.058239 0.0000194
UniRef90_A3CK83 -13.063269 0.0001287
UniRef90_P66717 -13.069189 0.0000311
UniRef90_E6KK04 -13.081844 0.0000318
UniRef90_B5E3W1 -13.082211 0.0000138
UniRef90_Q04J38 -13.084233 0.0001112
UniRef90_Q04JK8 -13.088419 0.0000003
UniRef90_C0MBU0 -13.107533 0.0000337
UniRef90_A0A139RMK4 -13.115100 0.0000295
UniRef90_Q97TC4 -13.123406 0.0000813
UniRef90_Q04JE2 -13.123461 0.0000036
UniRef90_P59186 -13.139954 0.0000272
UniRef90_Q04HZ9 -13.140601 0.0000210
UniRef90_I1ZPB5 -13.152555 0.0000212
UniRef90_Q8DS25 -13.169243 0.0000566
UniRef90_A0A139QR72 -13.178519 0.0000497
UniRef90_P66648 -13.188761 0.0000276
UniRef90_A0A0D0ZIG5 -13.223656 0.0000044
UniRef90_A8AZM0 -13.240020 0.0000192
UniRef90_A4VYP5 -13.244254 0.0000051
UniRef90_A0A139PM50 -13.248708 0.0000644
UniRef50_A0A0D0ZIG5 -13.254257 0.0000045
UniRef90_A0A2Z5TND2 -13.255398 0.0000087
UniRef90_S7XR44 -13.274436 0.0000017
UniRef90_P66394 -13.288386 0.0000455
UniRef90_Q8DS23 -13.297321 0.0000254
UniRef90_A0A139QR65 -13.299838 0.0000006
UniRef90_A3CRA2 -13.339479 0.0000476
UniRef90_Q8DS22 -13.355691 0.0000032
UniRef90_Q8CZ89 -13.374704 0.0000373
UniRef90_Q04IS6 -13.503939 0.0000237
UniRef90_A8AZL7 -13.509927 0.0000326
UniRef90_P48853 -13.531313 0.0000019
UniRef90_P66634 -13.531343 0.0000420
UniRef90_A8AZD5 -13.544545 0.0000219
UniRef90_P66137 -13.580103 0.0000133
UniRef90_B4U516 -13.648142 0.0000071
UniRef90_U5P1K2 -13.716305 0.0000127
UniRef90_V8IAL5 -13.720273 0.0000123
UniRef90_Q97T34 -13.814773 0.0000025
UniRef90_H2P484 -13.911094 0.0000000
UniRef90_A5MC70 -14.398737 0.0000012
UniRef90_Q04ML8 -14.538773 0.0000068
UniRef90_E6KLV2 -14.639250 0.0000185
UniRef90_K0ZJH0 -14.689870 0.0000000
UniRef90_Q3K3Q0 -14.715563 0.0000019
UniRef90_A0A2I1YXE0 -15.202859 0.0001122
UniRef90_U2ZPB1 -17.049232 0.0000124

Saliva vs buccal

Negative fold changes are higher in saliva samples while positive are higher in buccal samples.

sb3 = genes.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb3 = sb3[sb3['Saliva_Buccal_p'] < 0.01]
sb3 = sb3.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb3 = pd.concat([sb3.head(n=50), sb3.tail(n=50)])
sb3 = sb3.groupby(by=sb3.index, axis=0).mean()
sb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_A0A081Q6T6 8.862121 0.0051140
UniRef90_A0A139RCU2 8.693669 0.0049375
UniRef50_W1YUQ6: Iron-sulfur cluster binding protein (Fragment) 8.582003 0.0003729
UniRef90_E4LB29 8.323290 0.0000000
UniRef50_F9PVE3: Gram-positive signal peptide protein, YSIRK family 8.231943 0.0047906
UniRef90_A8AXT8 8.212688 0.0057492
UniRef90_A0A1F1IWP7 8.112856 0.0046921
UniRef90_W1WE06 8.031932 0.0007598
UniRef90_A0A1X1I534 7.837456 0.0097395
UniRef50_W1XVI8: NLPA lipoprotein (Fragment) 7.799328 0.0001882
UniRef90_A0A0T8ACY0 7.686068 0.0000941
UniRef90_W1V072 7.482241 0.0006265
UniRef90_A0A081R2B4 7.325415 0.0082141
UniRef90_A0A075B7C8 7.179019 0.0007298
UniRef90_Q708Q5 7.141482 0.0047735
UniRef50_W1Y3H1: Conserved carboxylase region (Fragment) 7.116781 0.0000299
UniRef50_E7N911 7.110605 0.0004794
UniRef90_E7N911 7.110605 0.0000077
UniRef50_A0A0T9FSR6 7.100827 0.0028679
UniRef50_E3GZN7 6.996615 0.0008823
UniRef90_E3GZN7 6.996615 0.0008823
UniRef90_D6KK22 6.944447 0.0013294
UniRef90_W1YLT1 6.934864 0.0000365
UniRef90_A0A139PTT3 6.859800 0.0023204
UniRef50_A0A0U0K5K2 6.751190 0.0004157
UniRef90_F0IKB1 6.732742 0.0024579
UniRef50_A0A139PIN8 6.620595 0.0012891
UniRef50_E3H3Y7 6.497254 0.0016452
UniRef90_E3H3Y7 6.497254 0.0015394
UniRef90_U7V1B2 6.481539 0.0084317
UniRef90_Q4QN10 6.475568 0.0014609
UniRef90_F7GBC1 6.350249 0.0014280
UniRef50_A0A0T9GUE3 6.341451 0.0020606
UniRef50_A0A0U0L3I1 6.252453 0.0001216
UniRef50_A0A0U0LBU0 6.241439 0.0011098
UniRef50_O54422 6.236804 0.0017761
UniRef50_UPI000175411A 6.232737 0.0000700
UniRef90_A0A0T8WBY0 6.192122 0.0005151
UniRef50_Q708Q5 6.178868 0.0052554
UniRef90_A0A150NGX6 6.032115 0.0019991
UniRef50_F7GBC1 5.976026 0.0020113
UniRef90_A0A0Y1ZBZ4 5.931165 0.0000145
UniRef90_A0A0Y2AQK8 5.915247 0.0009299
UniRef90_A5M8D7 5.914862 0.0003115
UniRef50_A5M8D7: Peptidase, M20/M25/M40 family protein 5.914862 0.0003115
UniRef50_A0A139Q0B7 5.907665 0.0048237
UniRef90_F5VX89 5.891731 0.0044566
UniRef50_F9Q3H8: Competence-induced protein Ccs4 family protein 5.771735 0.0082617
UniRef90_A0A0F2CH56 5.766957 0.0069526
UniRef90_A0A0T8AE83 5.763812 0.0023496
UniRef90_A6LEG9 -12.524120 0.0012097
UniRef90_F0F5Q9 -12.555641 0.0000000
UniRef90_A0A2D3LNT5 -12.562524 0.0000001
UniRef90_U2KD45 -12.566643 0.0000000
UniRef90_U2K4J4 -12.602537 0.0003876
UniRef90_L9PWI7 -12.628225 0.0007691
UniRef90_F9M3J5 -12.655427 0.0000767
UniRef90_U2K4R3 -12.670375 0.0000000
UniRef90_F0H8Q7 -12.678580 0.0000001
UniRef90_C4V316 -12.706970 0.0000395
UniRef90_A0A096C1X8 -12.727731 0.0013362
UniRef90_F9DLF8 -12.741067 0.0000000
UniRef90_A0A1B1I5Z7 -12.743715 0.0009261
UniRef90_D5EYY8 -12.777063 0.0011725
UniRef90_X4QQ61 -12.797472 0.0000010
UniRef90_C9N1P9 -12.798616 0.0000003
UniRef90_A0A0A2DYJ5 -12.800275 0.0010219
UniRef50_Q5T5H1: Alpha-endosulfine -12.804101 0.0000000
UniRef90_D9RWQ6 -12.828632 0.0018162
UniRef90_U2INL1 -12.833208 0.0000536
UniRef90_A0A2K9HB98 -12.843978 0.0001843
UniRef90_A0A1B1I6C5 -12.858335 0.0000000
UniRef90_G1VHD9 -12.937039 0.0000003
UniRef90_G1VAR6 -12.960851 0.0000002
UniRef90_A0A0K1NIV2 -12.972303 0.0000000
UniRef90_U2JAK3 -13.035604 0.0000026
UniRef90_A0A2G9ICB3 -13.072553 0.0004154
UniRef90_D4S901 -13.113845 0.0000006
UniRef90_U2KIJ5 -13.127126 0.0000000
UniRef90_A0A2D3L930 -13.129930 0.0000001
UniRef90_D4CUG6 -13.169832 0.0068414
UniRef90_A0A134A7E7 -13.209713 0.0000134
UniRef90_A0A2D3L5P2 -13.224481 0.0017631
UniRef90_C9MQ90 -13.229306 0.0000000
UniRef90_I0GNP0 -13.259245 0.0000008
UniRef90_A0A2D3LM49 -13.369449 0.0000000
UniRef90_A0A2C6B6C9 -13.374933 0.0073775
UniRef90_G1WBW8 -13.387095 0.0000000
UniRef50_A0A2J8WD87 -13.497056 0.0000000
UniRef90_U2J5N0 -13.604911 0.0000000
UniRef90_K7ESR6 -13.698188 0.0000000
UniRef90_A0A134C1V4 -13.706270 0.0000000
UniRef90_A0A0K2J3Z9 -13.803028 0.0000041
UniRef90_A0A2D3LLY2 -14.383255 0.0000009
UniRef90_A0A2I2YPF9 -14.513736 0.0000000
UniRef90_Q8RHH9 -14.516243 0.0011083
UniRef90_A0A134A205 -14.624728 0.0000001
UniRef90_X6PVX8 -14.810247 0.0008018
UniRef90_R5FPP1 -14.840933 0.0000000
UniRef90_C4V1U8 -15.399598 0.0000001

Saliva vs saliva_euk

Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.

ss3 = genes.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss3 = ss3[ss3['Saliva_Saliva_euk_p'] < 0.01]
ss3 = ss3.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss3 = pd.concat([ss3.head(n=50), ss3.tail(n=50)])
ss3 = ss3.groupby(by=ss3.index, axis=0).mean()
ss3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef90_A0A060Z7W3 8.282348 0.0003327
UniRef90_A0A378VVT4 7.664048 0.0099090
UniRef90_A0A378VVN6 7.584239 0.0027202
UniRef50_J3QRX4: Coordinator of PRMT5 and differentiation stimulator 6.821069 0.0063040
UniRef90_UPI000011108B 6.543879 0.0009548
UniRef50_UPI000011108B 6.345446 0.0010123
UniRef50_A0A127SR39 6.073086 0.0000011
UniRef90_G8FL39 6.001454 0.0017905
UniRef50_G8FL39: Mitochondrial glutaryl-Coenzyme A dehydrogenase (Fragment) 6.001454 0.0017905
UniRef90_UPI0004F08662 5.864100 0.0007430
UniRef90_F7EL88 5.482098 0.0003288
UniRef90_UPI00092A8639 5.435772 0.0000039
UniRef90_Q2PJC1 5.248742 0.0070655
UniRef90_A0A182PRM8 5.098600 0.0072978
UniRef50_A0A087X2D0 3.751685 0.0003786
UniRef50_A0A179D271 2.986443 0.0017260
UniRef50_P02301 2.732590 0.0021296
UniRef90_H2PKK1 2.577370 0.0014792
UniRef90_A0A0Q0XIY8 2.531513 0.0074443
UniRef90_A0A2D2APV3 2.488688 0.0081355
UniRef90_R0KIL0 2.479330 0.0048823
UniRef50_P84243 2.387499 0.0036276
UniRef50_Q9U281 2.325302 0.0070651
UniRef90_A0A2J8RFQ3 2.228360 0.0020223
UniRef90_G7NWY1 2.219460 0.0061305
UniRef90_P68036-3 2.208144 0.0070047
UniRef90_G7N124 2.201553 0.0043911
UniRef90_F7DGC8 2.193363 0.0026905
UniRef90_I7GP34 2.184647 0.0032542
UniRef50_F6XQ29: Histone H3 2.177236 0.0031249
UniRef50_A0A182JX25 2.170923 0.0026493
UniRef50_A0A2J8IJJ7 2.150553 0.0098009
UniRef90_V9H061 2.051102 0.0078702
UniRef90_A0A2U3XFV7 2.039427 0.0050353
UniRef90_A0A2R8Z8D0 2.028121 0.0004405
UniRef90_F7BWE8 1.992781 0.0072666
UniRef50_X1QTZ0: Marine sediment metagenome DNA, contig: S12H4_C02507 1.987414 0.0079446
UniRef50_A0A2U3XFV7 1.968830 0.0038186
UniRef50_V9H061: Unnamed HERV-H protein (Fragment) 1.947316 0.0060155
UniRef50_G5E9R0: Actin, cytoplasmic 1 1.910023 0.0060873
UniRef50_I7GP34: Macaca fascicularis brain cDNA clone: QflA-23777, similar to human hypothetical protein MGC33637 (MGC33637), mRNA, RefSeq: NM_152596.2 1.906366 0.0036016
UniRef50_A0A1D5PY49 1.887344 0.0075014
UniRef50_B8A6G2 1.886034 0.0065951
UniRef90_A0A2R8M9T5 1.882532 0.0045321
UniRef50_S9XUN5 1.876732 0.0060354
UniRef50_J3KT65 1.858143 0.0068394
UniRef90_F7CBQ1 1.840675 0.0020881
UniRef50_A0A0D3RVU0 1.833649 0.0023064
UniRef90_A0A2K5CZG7 1.827527 0.0061136
UniRef90_UPI0000E260C0 1.813880 0.0043698
UniRef90_UPI000D0C7B1B -9.849761 0.0000000
UniRef90_S7X8Y5 -9.864995 0.0041921
UniRef90_D5RFR1 -9.891940 0.0001036
UniRef90_Q7P3Y9 -9.904060 0.0003434
UniRef90_D5RFA4 -9.932169 0.0000540
UniRef90_X8HAW9 -9.945829 0.0077347
UniRef90_C9MMH6 -9.983416 0.0094733
UniRef90_A0A099BXI6 -9.985410 0.0090131
UniRef90_Q5F686 -9.993084 0.0081696
UniRef90_U7UXP7 -10.003804 0.0001958
UniRef90_F8HCU5 -10.017341 0.0007747
UniRef90_I2NQ87 -10.050622 0.0067671
UniRef90_I2NMZ8 -10.079234 0.0001723
UniRef90_Q8RHA6 -10.094057 0.0035789
UniRef90_I1ZKR1 -10.115973 0.0000633
UniRef90_J2Z9J7 -10.132112 0.0061563
UniRef90_F9PEW5 -10.133380 0.0000032
UniRef90_A0A3E4U7T2 -10.167806 0.0000175
UniRef50_G7N3R4 -10.201602 0.0000011
UniRef90_C8WAE2 -10.218783 0.0087937
UniRef90_E3CBB6 -10.259544 0.0001214
UniRef50_UPI000D0C7B1B -10.259927 0.0000000
UniRef90_E6KQV5 -10.261484 0.0086967
UniRef90_U1SCL3 -10.268713 0.0081649
UniRef90_F0FAA7 -10.336859 0.0091603
UniRef90_E8K809 -10.489002 0.0019604
UniRef90_E8K637 -10.493244 0.0000178
UniRef90_Q8RF81 -10.523584 0.0017007
UniRef90_W1VI75 -10.547989 0.0000033
UniRef90_E3CG68 -10.551141 0.0000978
UniRef90_C9N200 -10.581499 0.0064119
UniRef90_V8BDC1 -10.633253 0.0000030
UniRef90_J5HE19 -10.650225 0.0025169
UniRef90_Q8RIG0 -10.670150 0.0002202
UniRef90_UPI0002ADB0C8 -10.813543 0.0000000
UniRef90_U7UYD4 -10.822357 0.0002397
UniRef90_A0A2X0U125 -10.923687 0.0000005
UniRef90_F2QBW8 -11.283063 0.0000006
UniRef90_F2CGD2 -11.460475 0.0014440
UniRef90_I0Q7R4 -11.559095 0.0000005
UniRef90_A0A2R8MV81 -11.712899 0.0000000
UniRef90_Q8RDX9 -11.818368 0.0003052
UniRef90_E8K3H6 -11.899789 0.0000204
UniRef90_E6KT68 -11.996792 0.0001027
UniRef90_A0A2K5XWR8 -12.061613 0.0000000
UniRef90_D4S901 -13.113845 0.0062068
UniRef90_Q97R59 -13.172387 0.0000854
UniRef90_G0ICA3 -13.295380 0.0003260
UniRef90_Q8CWN7 -13.731959 0.0000000
UniRef90_K0ZJH0 -13.821083 0.0000000

Buccal vs buccal_euk

Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.

bbe3 = genes.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe3 = bbe3[bbe3['Buccal_Buccal_euk_p'] < 0.01]
bbe3 = bbe3.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe3 = pd.concat([bbe3.head(n=50), bbe3.tail(n=50)])
bbe3 = bbe3.groupby(by=bbe3.index, axis=0).mean()
bbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef50_K7EML0: Pancreatic hormone 7.0310054 0.0000004
UniRef90_M0R117 5.5011850 0.0059983
UniRef90_A0A2P5EZ99 3.9890821 0.0062551
UniRef90_A0A200R7A7 3.2039160 0.0022188
UniRef90_A0A2H5PBU6 2.8736481 0.0077219
UniRef90_G3HPV7 2.0364489 0.0066518
UniRef90_A0A1D5QXG0 1.6614831 0.0089477
UniRef90_UPI0000481B5D 1.3253055 0.0075491
UniRef90_UPI000689AF8D -0.7216742 0.0092929
UniRef90_S5G7D6 -0.8770696 0.0020936
UniRef90_A0A1X3K4L0 -0.9239208 0.0090500
UniRef50_S5G7D6: Reverse transcriptase (Fragment) -0.9485544 0.0024463
UniRef90_A0A2I3HL55 -1.1146873 0.0095099
UniRef90_U3J7Y9 -1.2395352 0.0088281
UniRef90_UPI00071A5F49 -1.2396464 0.0079720
UniRef90_A0A0W0F566 -1.3267132 0.0017491
UniRef90_A0A226P011 -1.3338100 0.0022315
UniRef90_A0A1A6FWH6 -1.3420823 0.0017333
UniRef90_A0A3A9SZX0 -1.4347887 0.0041514
UniRef90_A0A0D9RN11 -1.4398683 0.0084938
UniRef90_A0A2D9B6U5 -1.4889206 0.0004896
UniRef90_A0A164U839 -1.5027909 0.0037211
UniRef90_UPI00077D3835 -1.5080786 0.0004962
UniRef90_UPI000904C9AB -1.5308643 0.0027494
UniRef90_L5JW24 -1.5586683 0.0003841
UniRef90_UPI0008FA61AA -1.5594597 0.0025433
UniRef90_UPI000C9E384A -1.6019248 0.0025533
UniRef90_UPI000643CC37 -1.6127391 0.0025572
UniRef90_A0A218V149 -1.6310966 0.0045602
UniRef90_T1JAH4 -1.6456789 0.0025722
UniRef90_A0A2K5P9A7 -1.6551043 0.0001991
UniRef50_P00395: Cytochrome c oxidase subunit 1 -1.6793171 0.0029821
UniRef90_A0A0B1SRV1 -1.7092819 0.0035622
UniRef50_P00414: Cytochrome c oxidase subunit 3 -1.7130793 0.0027691
UniRef90_A0A3B4CZH6 -1.7158017 0.0003610
UniRef90_A0A3B4WM40 -1.7254403 0.0033884
UniRef90_A0A1I7VUF0 -1.7645164 0.0002052
UniRef50_P03886: NADH-ubiquinone oxidoreductase chain 1 -1.7747460 0.0030658
UniRef90_A0A0N6W1Y9 -1.7904823 0.0036262
UniRef90_UPI000763751A -1.8263882 0.0024975
UniRef90_A0A0K0JA80 -1.8470175 0.0001500
UniRef90_F0YMZ5 -1.8522387 0.0014339
UniRef90_Q8HNQ2 -1.8575767 0.0064046
UniRef90_A7S708 -1.8682428 0.0013884
UniRef90_UPI000CE634A6 -1.8721491 0.0006919
UniRef90_UPI0007B848D5 -1.9045456 0.0012979
UniRef90_UPI0008FA2BF1 -1.9189624 0.0021113
UniRef90_A0A0N7BB96 -1.9648185 0.0005051
UniRef90_A0A2D4BWI6 -1.9757272 0.0012930
UniRef90_A0A3B4X1B7 -1.9803506 0.0013545
UniRef50_A0A343H6K7 -9.8148188 0.0000000
UniRef90_D8UR72 -9.8382223 0.0000034
UniRef90_A0A2K5Q2B0 -9.8683093 0.0099186
UniRef50_Q96LU0: CDNA FLJ25069 fis, clone CBL05145 -9.8742796 0.0000000
UniRef90_A0A1N5F4A1 -9.9499290 0.0000001
UniRef90_F2QFF2 -9.9505363 0.0000002
UniRef90_F9Q035 -9.9659023 0.0000103
UniRef90_F0IU43 -9.9688900 0.0000010
UniRef90_W1XIL2 -9.9729787 0.0058006
UniRef90_F6QR35 -9.9777634 0.0000000
UniRef90_U7V619 -9.9961323 0.0000000
UniRef90_F9LUN9 -10.0050446 0.0000001
UniRef90_F3SL59 -10.0249965 0.0000021
UniRef90_M0QXE4 -10.0253680 0.0000000
UniRef90_UPI00045E37E4 -10.0579599 0.0000006
UniRef90_A8AZT8 -10.1081340 0.0000011
UniRef50_W1XIL2 -10.1105605 0.0049409
UniRef90_A0A059RKT2 -10.1461038 0.0098084
UniRef50_A0A0D9S931 -10.1515885 0.0000001
UniRef90_E0PNM2 -10.2043464 0.0000000
UniRef90_B1IBW4 -10.2046745 0.0000044
UniRef90_Q9N083 -10.2786860 0.0070558
UniRef90_W1VR94 -10.2825608 0.0000010
UniRef90_F0FH66 -10.2895807 0.0000012
UniRef90_A0A287B4J8 -10.3103464 0.0000000
UniRef90_A0A0D9S5U8 -10.3110844 0.0000005
UniRef90_Q8WZ39 -10.3357997 0.0000000
UniRef90_A0A287AUW5 -10.3480753 0.0000000
UniRef90_L8EAJ4 -10.4081280 0.0000004
UniRef90_F9Q2A0 -10.4147292 0.0050472
UniRef90_U7V0Y2 -10.5197579 0.0095404
UniRef90_UPI0005F4E7EF -10.5496471 0.0000000
UniRef90_A0JZ85 -10.7101199 0.0000009
UniRef90_G0PP10 -10.7435601 0.0089581
UniRef90_E8JSH6 -10.9159656 0.0000005
UniRef90_I1ZM31 -10.9648887 0.0061977
UniRef90_A0A0F3HB42 -10.9839635 0.0000139
UniRef90_F0IIW6 -11.1665854 0.0000000
UniRef90_Q9H7M3 -11.3754229 0.0000000
UniRef50_UPI00045E37E4: PREDICTED: protein GVQW1-like, partial -11.4294979 0.0000000
UniRef90_A0A0P6A737 -11.4848805 0.0000724
UniRef90_B2YI96 -11.6707221 0.0032838
UniRef90_A3CNB2 -11.7980256 0.0097986
UniRef90_W1XIF5 -12.1070340 0.0075653
UniRef50_W1XIF5 -12.1070340 0.0075653
UniRef90_A0A1X1G532 -12.3770215 0.0056662
UniRef90_I7G8A4 -12.4250648 0.0074428
UniRef90_P67530 -12.8829662 0.0084962
UniRef90_A0A139QR72 -13.1785192 0.0049855
UniRef90_A5MC70 -14.3987371 0.0000012

Blood vs saliva_euk

Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.

blbe3 = genes.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe3 = blbe3[blbe3['Blood_Saliva_euk_p'] < 0.01]
blbe3 = blbe3.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe3 = pd.concat([blbe3.head(n=50), blbe3.tail(n=50)])
blbe3 = blbe3.groupby(by=blbe3.index, axis=0).mean()
blbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef90_Q5LXM5 15.340131 0.0000004
UniRef90_H2P484 14.073019 0.0000000
UniRef90_A0A2I1TTC0 13.898148 0.0000002
UniRef50_A0A146PTM6 13.108306 0.0000006
UniRef50_Q9H7M3: FLJ00047 protein (Fragment) 13.068202 0.0000000
UniRef90_A0A2K6THT0 13.056995 0.0000000
UniRef90_Q96NR6 12.873614 0.0000000
UniRef90_H2P861 12.697271 0.0094192
UniRef90_D3IDX3 12.583876 0.0000000
UniRef90_J7TUV6 12.523060 0.0000054
UniRef90_E4LCZ2 12.367192 0.0000048
UniRef50_Q9NRJ1: Protein MOST-1 12.358124 0.0000000
UniRef90_H2PFT0 12.204896 0.0000000
UniRef50_A0A174IYG9 12.163148 0.0000000
UniRef90_A0A174IYG9 12.072377 0.0000000
UniRef90_A7AJP8 12.042644 0.0000116
UniRef90_I7GKC4 12.036674 0.0000000
UniRef90_A5ZLI5 12.032373 0.0000000
UniRef90_D9RRC6 12.015198 0.0001089
UniRef90_A0A2J8UEK6 11.960983 0.0000000
UniRef90_D6KM31 11.925676 0.0000002
UniRef90_W1V953 11.870992 0.0000001
UniRef90_G5SMU9 11.813347 0.0000027
UniRef50_V8CH43 11.796140 0.0000000
UniRef90_A0A134C920 11.745783 0.0000131
UniRef90_D5EYY8 11.715933 0.0000000
UniRef90_A0A096C1X8 11.679717 0.0000002
UniRef90_A0A2D3LM49 11.661013 0.0000001
UniRef90_C9MQ90 11.624374 0.0000020
UniRef50_F8WAV8: Interleukin-20 receptor subunit beta 11.542661 0.0096313
UniRef90_A0A1B1IC24 11.528934 0.0000000
UniRef90_H2NVN9 11.515563 0.0000000
UniRef90_A7UXY6 11.501784 0.0000510
UniRef50_W1W9S2 11.494030 0.0000000
UniRef50_B4DHF1: cDNA FLJ61749 11.482711 0.0000000
UniRef90_X1H491 11.475303 0.0000000
UniRef90_Q8A499 11.471490 0.0000002
UniRef50_A0A1C5TMP4 11.457822 0.0000003
UniRef50_A5ZLI4 11.394738 0.0000150
UniRef50_W1WLZ3 11.389933 0.0000005
UniRef90_W1WLZ3 11.379954 0.0000005
UniRef90_A7B8Q2 11.307113 0.0000353
UniRef90_U2Z4S8 11.302840 0.0000896
UniRef90_D3IN37 11.291941 0.0000001
UniRef90_E4LBK5 11.270987 0.0000130
UniRef50_A0A2K0XJM9 11.260841 0.0000022
UniRef90_J7TNS4 11.257366 0.0000327
UniRef90_U2J131 11.235496 0.0000000
UniRef90_Q9NRJ1 11.232382 0.0000000
UniRef90_G1VAR6 11.231031 0.0000015
UniRef50_UPI000B3F59CB -1.190865 0.0056979
UniRef50_W5P5C7 -1.205205 0.0067865
UniRef90_UPI000732AD10 -1.212379 0.0075341
UniRef90_UPI000847C819 -1.231627 0.0075318
UniRef90_Q6ZRZ7 -1.236436 0.0089660
UniRef50_A0A1A9ASM8 -1.241270 0.0014614
UniRef90_A0A1A9ASM8 -1.241270 0.0014614
UniRef90_P30050 -1.265497 0.0052068
UniRef90_A0A2U2W3H2 -1.283754 0.0023887
UniRef90_H2N8Q4 -1.331225 0.0064317
UniRef50_G1QSA7 -1.364410 0.0057139
UniRef50_P08865: 40S ribosomal protein SA -1.373165 0.0005701
UniRef50_G3SIN0 -1.401976 0.0048580
UniRef50_UPI000EF294EE -1.416099 0.0024257
UniRef90_UPI000EF294EE -1.420442 0.0018108
UniRef50_A0A1A9ANW3 -1.447717 0.0006089
UniRef90_F7HQ61 -1.489799 0.0076729
UniRef90_A0A2K5HHY8 -1.493760 0.0015008
UniRef50_F8VYN5: Heterogeneous nuclear ribonucleoprotein A1 (Fragment) -1.517292 0.0016908
UniRef50_UPI0008F51C75 -1.528115 0.0062973
UniRef90_A0A1A9ANW3 -1.528735 0.0002737
UniRef90_UPI000D0A1051 -1.533556 0.0069270
UniRef90_Q9Z154 -1.557043 0.0011519
UniRef90_G7P1P7 -1.567828 0.0035882
UniRef50_A0A2U2VZN6 -1.582218 0.0030915
UniRef90_G7MP19 -1.585940 0.0006413
UniRef90_X1UIZ8 -1.638382 0.0007095
UniRef90_D6RAT0 -1.684800 0.0063811
UniRef90_A0A2U2VZN6 -1.704860 0.0029642
UniRef90_UPI000C313812 -1.740306 0.0041819
UniRef50_P03923: NADH-ubiquinone oxidoreductase chain 6 -1.763740 0.0071359
UniRef90_UPI0009A8E1A0 -1.822238 0.0065870
UniRef90_P61247 -1.861389 0.0024973
UniRef50_P03905: NADH-ubiquinone oxidoreductase chain 4 -1.891614 0.0069146
UniRef90_P84040 -2.017721 0.0076135
UniRef90_J9NRV0 -2.017721 0.0076135
UniRef90_A0A2M4CW35 -2.018112 0.0076281
UniRef90_R4G555 -2.114032 0.0076415
UniRef90_F7A9Z7 -2.117455 0.0023078
UniRef90_P03923 -2.134030 0.0074298
UniRef90_S9X3V4 -2.265461 0.0057090
UniRef50_H2PTC1 -2.328117 0.0060452
UniRef90_X1FPE0 -2.522721 0.0098859
UniRef90_A0A343KLL1 -2.580423 0.0039726
UniRef90_H2PTC1 -2.605292 0.0060061
UniRef90_B4YG13 -2.692092 0.0043643
UniRef90_A0A343FX78 -2.700931 0.0030189
UniRef90_Q99878 -3.176581 0.0044857
UniRef90_F6TZJ6 -3.647140 0.0060113
UniRef90_A0A059UGD1 -6.564523 0.0053080

Blood vs buccal_euk

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

blse3 = genes.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse3 = blse3[blse3['Blood_Buccal_euk_p'] < 0.01]
blse3 = blse3.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse3 = pd.concat([blse3.head(n=50), blse.tail(n=50)])
blse3 = blse3.groupby(by=blse3.index, axis=0).mean()
blse3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
UniRef90_H2NL54 12.134172 0.0000001
UniRef90_I7GKC4 11.766724 0.0000025
UniRef90_A0A2B5ASZ2 11.486150 0.0000014
UniRef90_B3KRW9 11.399952 0.0000111
UniRef50_D3DPD0: HCG1652138, isoform CRA_a 11.389117 0.0000000
UniRef90_G7P4E9 11.289988 0.0006329
UniRef90_UPI0004F0B482 11.183212 0.0000001
UniRef50_UPI000CEFC57C 11.085708 0.0000004
UniRef90_UPI000CEFC57C 11.069592 0.0000013
UniRef90_G7NJR0 10.880698 0.0088362
UniRef90_Q04ML4 10.766447 0.0003120
UniRef90_A0A2K6KDZ4 10.696823 0.0069987
UniRef90_Q8DRE1 10.693464 0.0000025
UniRef90_A0A2I3MMY7 10.331505 0.0000001
UniRef90_H2NUU9 10.040391 0.0000016
UniRef50_Q6ZRI4: cDNA FLJ46339 fis, clone TESTI4046450 9.991525 0.0000027
UniRef90_Q6ZRI4 9.991525 0.0000027
UniRef90_F8WAV8 9.987112 0.0000004
UniRef90_H3BUS0 9.970033 0.0000073
UniRef50_Q9UI52: HCG1818783 9.892900 0.0000113
UniRef90_G7N124 9.818158 0.0000000
UniRef50_UPI000B7B7569 9.788402 0.0000023
UniRef90_A0A1D5R7R1 9.722046 0.0000000
UniRef50_W1W9S2 9.691685 0.0000205
UniRef90_A0A0F0CDV5 9.626875 0.0002394
UniRef50_A0A0F0CDV5 9.626875 0.0002504
UniRef50_H2PP79 9.549300 0.0000001
UniRef90_H2PP79 9.549300 0.0000001
UniRef50_Q6ZSN1 9.538742 0.0000425
UniRef50_M0R171: Zinc finger protein 468 9.437993 0.0000017
UniRef90_M0R171 9.437993 0.0000017
UniRef90_J7SHX6 9.308914 0.0001051
UniRef90_H2NBN2 9.264875 0.0000002
UniRef50_Q02878: 60S ribosomal protein L6 9.131735 0.0000003
UniRef90_UPI0005F40944 9.100331 0.0000002
UniRef50_UPI0005F40944 9.100331 0.0000002
UniRef90_A0A2I3LMT4 9.093173 0.0000927
UniRef90_A0A2I3RVA0 9.055319 0.0000000
UniRef90_A0A1A9AMQ3 9.050824 0.0000030
UniRef50_A0A1A9AMQ3 9.050824 0.0000030
UniRef50_P05141: ADP/ATP translocase 2 9.033386 0.0000077
UniRef90_Q6ZSN1 8.997338 0.0000172
UniRef90_Q6ZVI0 8.957761 0.0000002
UniRef50_Q6ZVI0: cDNA FLJ42559 fis, clone BRACE3006226 8.957761 0.0000002
UniRef50_B4E3R2: cDNA FLJ59085, highly similar to Polycystin-1 8.865063 0.0000008
UniRef90_A0A2I2ZDD7 8.860090 0.0000007
UniRef50_L8E7P6: Alternative protein FLG 8.851721 0.0000049
UniRef90_A0A062X488 8.844600 0.0000186
UniRef50_A0A062X488 8.844600 0.0000280
UniRef90_S4R460 8.773151 0.0000000
UniRef50_I7G8L2: Macaca fascicularis brain cDNA clone: QmoA-11833, similar to human chemokine (C-C motif) ligand 5 (CCL5), mRNA, RefSeq: NM_002985.2 -3.057083 0.0044197
UniRef50_UPI000273D3DB: PREDICTED: histone demethylase UTY-like -3.058105 0.0006568
UniRef50_X1RTY3: Marine sediment metagenome DNA, contig: S12H4_L05131 -3.088100 0.0001320
UniRef50_Q6ZNX4: CDNA FLJ26942 fis, clone RCT07464 -3.088863 0.0000248
UniRef50_Q9BZ60: FKSG63 -3.094164 0.0043170
UniRef50_H2PBI0 -3.194845 0.0050537
UniRef50_Q8NF05: FLJ00399 protein (Fragment) -3.195422 0.0017753
UniRef50_G3RYZ2 -3.202026 0.0017820
UniRef50_Q1W209: Embryonic stem cell-related gene protein -3.298909 0.0040440
UniRef50_UPI00045E37E4: PREDICTED: protein GVQW1-like, partial -3.521571 0.0021273
UniRef50_F6TJB2 -3.599640 0.0073896
UniRef50_W1YJZ2 -3.669211 0.0000867
UniRef50_F7GRN4 -3.714752 0.0009200
UniRef50_Q6ZUG4: cDNA FLJ43741 fis, clone TESTI2017727 -3.918750 0.0026472
UniRef50_Q6FG63: FLJ10385 protein -4.086130 0.0029433
UniRef50_W1WD51 -4.129838 0.0000402
UniRef50_O95662: KpnI repetitive sequence (T-betaG41) 3kb downstream of beta-globin protein (Fragment) -4.204094 0.0015056
UniRef50_Q8WY51: HC6 -4.225498 0.0064566
UniRef50_S8BUY4 -4.259124 0.0000409
UniRef50_F6S8D7 -4.301492 0.0058294
UniRef50_Q8N329: C3orf64 protein -4.310691 0.0051846
UniRef50_I7G4S6: Macaca fascicularis brain cDNA clone: QbsB-10410, similar to human hypothetical protein FLJ20457 (FLJ20457), mRNA, RefSeq: NM_017832.2 -4.419854 0.0067237
UniRef50_F7GW59 -4.437119 0.0000852
UniRef50_Q8N287: cDNA FLJ33669 fis, clone BRAMY2028740 -4.463438 0.0061385
UniRef50_W1V5A8 -4.529869 0.0000501
UniRef50_Q6ZR97: cDNA FLJ46534 fis, clone THYMU3037052, weakly similar to Homo sapiens HIV TAT specific factor 1 (HTATSF1) -4.612080 0.0042394
UniRef50_UPI000292A7A2 -4.755766 0.0050788
UniRef50_G2HH39: Zinc finger protein 580 -4.874037 0.0066219
UniRef50_F7HM41 -5.239724 0.0041429
UniRef50_G9L4Y9 -5.290411 0.0077388
UniRef50_F7ERF9 -5.464497 0.0084036
UniRef50_UPI0001AF4819: hypothetical protein -5.485818 0.0013148
UniRef50_I7G402: Macaca fascicularis brain cDNA clone: QbsA-12013, similar to human glutamate receptor, metabotropic 6 (GRM6), mRNA, RefSeq: NM_000843.2 -5.510659 0.0026030
UniRef50_G7PG11 -5.567385 0.0067932
UniRef50_Q96LU0: CDNA FLJ25069 fis, clone CBL05145 -5.625021 0.0047341
UniRef50_Q9UI61: PRO0470 -5.839999 0.0088171
UniRef50_Q9GMT0 -5.905245 0.0092684
UniRef50_Q6ZNZ6: CDNA FLJ26821 fis, clone PRS06629 -5.952807 0.0070262
UniRef50_M3Z9H5 -6.138953 0.0025770
UniRef50_F7CR38 -6.162863 0.0077810
UniRef50_X6R7Y7: Intraflagellar transport protein 25 homolog -6.496334 0.0059577
UniRef50_K7EJ15: Ankyrin repeat domain-containing protein 20A3 (Fragment) -6.986405 0.0002879
UniRef50_F6ZQ59 -7.076242 0.0070378
UniRef50_Q4XHH2 -7.100608 0.0066861
UniRef50_Q8N210: Retbindin -7.152420 0.0042904
UniRef50_H7BYS4 -7.256297 0.0030867
UniRef50_Q4Y9P0 -7.638074 0.0045271
UniRef50_Q8WTZ3: Zinc finger protein ENSP00000375192 -8.305599 0.0091100
UniRef50_S9WFI9 -9.303851 0.0000001
UniRef50_S4L0C2 -9.774147 0.0042497

HUMAnN most differentially abundant KEGG orthologs

For each of these, I am initially filtering out to only P values that are below 0.01, and then am taking only the most up- or down-regulated KEGG orthologs and printing them to a table.

HUMAnN2

For HUMAnN2 I am now looking at the KEGG orthologs assigned to the Uniref50 database, as above for the gene families.

kos = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann/processing/genes_50_ko.csv', header=0, index_col=0)
kegg_list = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kegg_list.csv', header=0, index_col=0)
kegg_list.drop_duplicates(inplace=True)
kegg_dict = {}
kl = list(kegg_list.index.values)
for ko in kl:
  kegg_dict[ko] = ko+' '+kegg_list.loc[ko, 'Product']
kos = kos.rename(index=kegg_dict)

Blood vs saliva

Negative fold changes are higher in saliva samples while positive are higher in blood samples.

bs = kos.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs = bs[bs['Saliva_Blood_p'] < 0.01]
bs = bs.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs = pd.concat([bs.head(n=50), bs.tail(n=50)])
bs.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K04496 CTBP; C-terminal binding protein 1.9334599 0.0002204
K02866 RP-L10e, RPL10; large subunit ribosomal protein L10e 1.7897436 0.0028306
K02268 COX6C; cytochrome c oxidase subunit 6c 1.7510718 0.0067512
K02934 RP-L6e, RPL6; large subunit ribosomal protein L6e 1.4908265 0.0024136
K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 1.3915885 0.0019331
K05414 IFNA; interferon alpha 1.1521229 0.0033957
K12741 HNRNPA1_3; heterogeneous nuclear ribonucleoprotein A1/A3 1.0346679 0.0038171
K19469 FTO; mRNA N6-methyladenine demethylase 1.0320411 0.0044718
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -0.7506043 0.0069142
K02998 RP-SAe, RPSA; small subunit ribosomal protein SAe -1.0165759 0.0064599
K08961 K08961; chondroitin-sulfate-ABC endolyase/exolyase -1.0460080 0.0061217
K09228 KRAB; KRAB domain-containing zinc finger protein -1.1464222 0.0062389
K02870 RP-L12e, RPL12; large subunit ribosomal protein L12e -1.1857902 0.0036332
K12567 TTN; titin -1.3149551 0.0000317
K14217 IFIT1; interferon-induced protein with tetratricopeptide repeats 1 -1.3358230 0.0021426
K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit -1.3438118 0.0008233
K09671 CHST6; carbohydrate 6-sulfotransferase 6 -1.6336278 0.0018200
K16741 ALMS1; alstrom syndrome protein 1 -1.7043058 0.0010936
K04746 CHST4; carbohydrate 6-sulfotransferase 4 -1.7395004 0.0003520
K15613 MEIS1; homeobox protein Meis1 -1.7553549 0.0099416
K06122 dhbE; glycerol dehydratase small subunit -1.8117034 0.0010157
K11480 AURKC; aurora kinase C -1.9882583 0.0009205
K03096 FRAT2; frequently rearranged in advanced T-cell lymphomas 2 -2.0489115 0.0012559
K08026 ONECUT1, HNF6; one cut domain, family member 1, hepatocyte nuclear factor 6 -2.0718133 0.0019616
K10054 PML, TRIM19; probable transcription factor PML -2.1362750 0.0016119
K16496 PCDHGB; protocadherin gamma subfamily B -2.1441586 0.0000246
K16670 MEIS2; homeobox protein Meis2 -2.1545379 0.0002416
K04011 CR1, CD35; complement component (3b/4b) receptor 1 -2.1798510 0.0047209
K09370 ISL1; insulin gene enhancer protein ISL-1 -2.2417383 0.0040429
K03006 RPB1, POLR2A; DNA-directed RNA polymerase II subunit RPB1 -2.2887214 0.0023002
K12864 CTNNBL1; beta-catenin-like protein 1 -2.2988227 0.0098203
K03065 PSMC3, RPT5; 26S proteasome regulatory subunit T5 -2.2994769 0.0014369
K08610 ADAM21; disintegrin and metalloproteinase domain-containing protein 21 -2.3529345 0.0000961
K02329 FZD3; frizzled 3 -2.3883758 0.0034076
K17778 TIM10; mitochondrial import inner membrane translocase subunit TIM10 -2.4172144 0.0085489
K07606 VIM; vimentin -2.4243389 0.0047630
K10455 KLHL18; kelch-like protein 18 -2.4673402 0.0071273
K09387 FOXA1, HNF3A; forkhead box protein A1, hepatocyte nuclear factor 3-alpha -2.4945583 0.0023481
K16586 HAUS3; HAUS augmin-like complex subunit 3 -2.4967870 0.0010657
K04914 KCNK3, K2P3.1; potassium channel subfamily K member 3 -2.5433417 0.0012574
K12477 EHD4; EH domain-containing protein 4 -2.5433417 0.0008572
K12653 NLRX1; NLR family member X1 -2.5547055 0.0065685
K08103 HS6ST3; heparan sulfate 6-O-sulfotransferase HS6ST3 -2.5828174 0.0002684
K11182 AOC1, ABP1; diamine oxidase -2.5835284 0.0002678
K18727 TNPO2, IPO3, KPNB2B; transportin-2 -2.5994545 0.0000150
K13981 AKR1E2, AKR1CL2; 1,5-anhydro-D-fructose reductase -2.6269843 0.0001921
K08527 RARA, NR1B1; retinoic acid receptor alpha -2.6342905 0.0020800
K04813 CHRNB2; nicotinic acetylcholine receptor beta-2 -2.6342905 0.0000155
K17709 CYP2B6; cytochrome P450 family 2 subfamily B polypeptide 6 -2.6435049 0.0038909
K08528 RARB, NR1B2; retinoic acid receptor beta -2.6542668 0.0000447
K02867 RP-L11, MRPL11, rplK; large subunit ribosomal protein L11 -14.2369610 0.0000000
K02968 RP-S20, rpsT; small subunit ribosomal protein S20 -14.2403147 0.0000000
K02871 RP-L13, MRPL13, rplM; large subunit ribosomal protein L13 -14.2509766 0.0000000
K02895 RP-L24, MRPL24, rplX; large subunit ribosomal protein L24 -14.2587624 0.0000000
K02015 ABC.FEV.P; iron complex transport system permease protein -14.2605983 0.0000000
K02888 RP-L21, MRPL21, rplU; large subunit ribosomal protein L21 -14.2628930 0.0000000
K02874 RP-L14, MRPL14, rplN; large subunit ribosomal protein L14 -14.2631149 0.0000000
K02890 RP-L22, MRPL22, rplV; large subunit ribosomal protein L22 -14.2878814 0.0000000
K02935 RP-L7, MRPL12, rplL; large subunit ribosomal protein L7/L12 -14.2943955 0.0000000
K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 -14.2999624 0.0000000
K02994 RP-S8, rpsH; small subunit ribosomal protein S8 -14.3026847 0.0000000
K03496 parA, soj; chromosome partitioning protein -14.3028272 0.0000000
K02110 ATPF0C, atpE; F-type H+-transporting ATPase subunit c -14.3061261 0.0000000
K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 -14.3396335 0.0013182
K02990 RP-S6, MRPS6, rpsF; small subunit ribosomal protein S6 -14.3455530 0.0000000
K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 -14.3564629 0.0000000
K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 -14.3618646 0.0000000
K02876 RP-L15, MRPL15, rplO; large subunit ribosomal protein L15 -14.3672130 0.0000000
K02996 RP-S9, MRPS9, rpsI; small subunit ribosomal protein S9 -14.3718180 0.0000000
K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial -14.3725317 0.0000000
K07496 K07496; putative transposase -14.3817416 0.0000090
K02013 ABC.FEV.A; iron complex transport system ATP-binding protein -14.3830435 0.0000000
K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 -14.3892942 0.0000000
K02952 RP-S13, rpsM; small subunit ribosomal protein S13 -14.3917295 0.0000000
K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 -14.4148047 0.0000000
K02904 RP-L29, rpmC; large subunit ribosomal protein L29 -14.4183841 0.0000000
K02965 RP-S19, rpsS; small subunit ribosomal protein S19 -14.4973970 0.0000000
K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 -14.5614245 0.0000000
K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 -14.5673952 0.0000000
K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 -14.5759170 0.0000000
K02003 ABC.CD.A; putative ABC transport system ATP-binding protein -14.5781923 0.0000000
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 -14.6157375 0.0000000
K02946 RP-S10, MRPS10, rpsJ; small subunit ribosomal protein S10 -14.6386369 0.0000000
K07024 SPP; sucrose-6-phosphatase -14.6485644 0.0000000
K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 -14.6563031 0.0013393
K02961 RP-S17, MRPS17, rpsQ; small subunit ribosomal protein S17 -14.7600819 0.0004679
K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 -14.7802134 0.0000000
K03530 hupB; DNA-binding protein HU-beta -14.7807616 0.0000000
K01990 ABC-2.A; ABC-2 type transport system ATP-binding protein -14.8026705 0.0000000
K01992 ABC-2.P; ABC-2 type transport system permease protein -14.8843035 0.0000000
K03073 secE; preprotein translocase subunit SecE -14.8931547 0.0000000
K02916 RP-L35, MRPL35, rpmI; large subunit ribosomal protein L35 -14.9053602 0.0000000
K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 -14.9478983 0.0000000
K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 -14.9753640 0.0000000
K02078 acpP; acyl carrier protein -14.9758915 0.0000000
K02518 infA; translation initiation factor IF-1 -14.9954649 0.0000000
K02907 RP-L30, MRPL30, rpmD; large subunit ribosomal protein L30 -15.1068973 0.0000000
K02911 RP-L32, MRPL32, rpmF; large subunit ribosomal protein L32 -15.2205278 0.0000000
K02914 RP-L34, MRPL34, rpmH; large subunit ribosomal protein L34 -15.9546015 0.0000000
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 -17.7654733 0.0000000

Blood vs buccal

Negative fold changes are higher in buccal samples while positive are higher in blood samples.

bb = kos.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb = bb[bb['Buccal_Blood_p'] < 0.01]
bb= bb.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb = pd.concat([bb.head(n=50), bb.tail(n=50)])
bb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K02866 RP-L10e, RPL10; large subunit ribosomal protein L10e 1.499031 0.0055909
K04496 CTBP; C-terminal binding protein 1.270333 0.0013989
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 -1.205975 0.0015199
K00522 FTH1; ferritin heavy chain -1.213633 0.0083241
K02934 RP-L6e, RPL6; large subunit ribosomal protein L6e -1.308519 0.0078647
K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 -1.367098 0.0044002
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.474114 0.0013476
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 -1.599817 0.0003537
K02261 COX2; cytochrome c oxidase subunit 2 -1.625205 0.0000490
K02262 COX3; cytochrome c oxidase subunit 3 -1.631428 0.0000390
K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit -1.657063 0.0000135
K02256 COX1; cytochrome c oxidase subunit 1 -1.714630 0.0000465
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 -1.784757 0.0005730
K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L -1.831291 0.0003196
K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a -2.060504 0.0026289
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -2.431830 0.0000092
K03767 PPIA; peptidyl-prolyl cis-trans isomerase A (cyclophilin A) -2.792954 0.0059237
K16496 PCDHGB; protocadherin gamma subfamily B -2.898683 0.0000035
K16495 PCDHGA; protocadherin gamma subfamily A -3.011391 0.0002148
K08610 ADAM21; disintegrin and metalloproteinase domain-containing protein 21 -3.340978 0.0000233
K16741 ALMS1; alstrom syndrome protein 1 -3.356812 0.0001533
K01940 argG, ASS1; argininosuccinate synthase -3.527865 0.0010200
K12406 PKLR; pyruvate kinase isozymes R/L -3.572806 0.0000174
K04290 S1PR3, EDG3; sphingosine 1-phosphate receptor 3 -3.577329 0.0000749
K07625 GJB6, CX30; gap junction beta-6 protein -3.712632 0.0016658
K18727 TNPO2, IPO3, KPNB2B; transportin-2 -3.908811 0.0000002
K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase -3.959823 0.0003065
K01753 dsdA; D-serine dehydratase -3.974443 0.0000856
K12437 pks13; polyketide synthase 13 -4.015661 0.0011796
K16145 MUC16, CA125; mucin-16 -4.032076 0.0000001
K04876 KCNA3, KV1.3; potassium voltage-gated channel Shaker-related subfamily A member 3 -4.125433 0.0000001
K16090 fiu; catecholate siderophore receptor -4.160175 0.0000637
K16653 dprE1; decaprenylphospho-beta-D-ribofuranose 2-oxidase -4.174180 0.0000044
K01667 tnaA; tryptophanase -4.193504 0.0000065
K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 -4.261196 0.0000131
K13183 DDX50; ATP-dependent RNA helicase DDX50 -4.292635 0.0000056
K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 -4.350986 0.0000000
K04879 KCNA6, KV1.6; potassium voltage-gated channel Shaker-related subfamily A member 6 -4.368162 0.0000187
K09393 TFDP3; transcription factor Dp-3 -4.391917 0.0000011
K16351 LRRC4, NGL2; netrin-G2 ligand -4.459247 0.0000003
K13922 pduP; propionaldehyde dehydrogenase -4.464456 0.0000095
K05988 dexA; dextranase -4.466025 0.0007942
K05757 ARPC1A_B; actin related protein 43892 complex, subunit 1A/1B -4.492685 0.0000001
K17391 IGF2BP1; insulin-like growth factor 2 mRNA-binding protein 1 -4.543289 0.0000008
K01805 xylA; xylose isomerase -4.543446 0.0003544
K15043 KPNA2_7; importin subunit alpha-1/8 -4.551707 0.0000001
K17582 MKI67; antigen KI-67 -4.560015 0.0000000
K19339 nosR; NosR/NirI family transcriptional regulator, nitrous oxide reductase regulator -4.570831 0.0001472
K11993 ACOT6; acyl-CoA thioesterase 6 -4.572809 0.0000002
K04901 KCNG2, KV6.2; potassium voltage-gated channel subfamily G member 2 -4.579436 0.0000690
K02952 RP-S13, rpsM; small subunit ribosomal protein S13 -12.823884 0.0000020
K02968 RP-S20, rpsT; small subunit ribosomal protein S20 -12.840400 0.0000031
K02036 pstB; phosphate transport system ATP-binding protein -12.867276 0.0001099
K03217 yidC, spoIIIJ, OXA1, ccfA; YidC/Oxa1 family membrane protein insertase -12.880814 0.0000060
K02834 rbfA; ribosome-binding factor A -12.888706 0.0000052
K12295 comE; two-component system, LytTR family, response regulator ComE -12.903241 0.0001530
K02003 ABC.CD.A; putative ABC transport system ATP-binding protein -12.923195 0.0000004
K02895 RP-L24, MRPL24, rplX; large subunit ribosomal protein L24 -12.931376 0.0000003
K16509 spxA; regulatory protein spx -12.946255 0.0000188
K02035 ABC.PE.S; peptide/nickel transport system substrate-binding protein -12.951988 0.0000037
K02994 RP-S8, rpsH; small subunit ribosomal protein S8 -12.958845 0.0000050
K02013 ABC.FEV.A; iron complex transport system ATP-binding protein -12.970199 0.0000005
K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 -12.970403 0.0034532
K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 -12.978923 0.0000004
K02440 GLPF; glycerol uptake facilitator protein -12.985260 0.0000018
K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 -13.012473 0.0000001
K03073 secE; preprotein translocase subunit SecE -13.012813 0.0001316
K02892 RP-L23, MRPL23, rplW; large subunit ribosomal protein L23 -13.041780 0.0000012
K02961 RP-S17, MRPS17, rpsQ; small subunit ribosomal protein S17 -13.062567 0.0013966
K03402 argR, ahrC; transcriptional regulator of arginine metabolism -13.063615 0.0000080
K02965 RP-S19, rpsS; small subunit ribosomal protein S19 -13.092549 0.0000024
K08998 K08998; uncharacterized protein -13.100846 0.0000116
K02946 RP-S10, MRPS10, rpsJ; small subunit ribosomal protein S10 -13.106369 0.0000006
K02529 lacI, galR; LacI family transcriptional regulator -13.145471 0.0000210
K03499 trkA, ktrA; trk system potassium uptake protein -13.160580 0.0000018
K02110 ATPF0C, atpE; F-type H+-transporting ATPase subunit c -13.177140 0.0000005
K02904 RP-L29, rpmC; large subunit ribosomal protein L29 -13.188205 0.0000000
K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 -13.190505 0.0000043
K02794 PTS-Man-EIIB, manX; PTS system, mannose-specific IIB component -13.206069 0.0000440
K02793 PTS-Man-EIIA, manX; PTS system, mannose-specific IIA component -13.215231 0.0000194
K02004 ABC.CD.P; putative ABC transport system permease protein -13.275588 0.0000027
K02015 ABC.FEV.P; iron complex transport system permease protein -13.354928 0.0000008
K02078 acpP; acyl carrier protein -13.454275 0.0000016
K03111 ssb; single-strand DNA-binding protein -13.454335 0.0000044
K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 -13.486267 0.0000005
K07052 K07052; uncharacterized protein -13.508456 0.0000243
K03574 mutT, NUDT15, MTH2; 8-oxo-dGTP diphosphatase -13.527932 0.0000043
K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 -13.558267 0.0000014
K02518 infA; translation initiation factor IF-1 -13.721290 0.0000007
K01756 purB, ADSL; adenylosuccinate lyase -13.777987 0.0000075
K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial -13.779813 0.0000017
K07024 SPP; sucrose-6-phosphatase -13.822793 0.0000022
K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 -13.925475 0.0000017
K02030 ABC.PA.S; polar amino acid transport system substrate-binding protein -14.009529 0.0000147
K07729 K07729; putative transcriptional regulator -14.027926 0.0000088
K02907 RP-L30, MRPL30, rpmD; large subunit ribosomal protein L30 -14.061021 0.0000020
K02911 RP-L32, MRPL32, rpmF; large subunit ribosomal protein L32 -14.062830 0.0000173
K01992 ABC-2.P; ABC-2 type transport system permease protein -14.306074 0.0000008
K01990 ABC-2.A; ABC-2 type transport system ATP-binding protein -14.930515 0.0000006
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 -17.574972 0.0000496

Saliva vs buccal

Negative fold changes are higher in saliva samples while positive are higher in buccal samples.

sb = kos.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb = sb[sb['Saliva_Buccal_p'] < 0.01]
sb= sb.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb = pd.concat([sb.head(n=50), sb.tail(n=50)])
sb.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K00689 E2.4.1.5; dextransucrase 3.091796 0.0019816
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 1.964259 0.0000558
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 1.884454 0.0000277
K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a 1.875469 0.0000097
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 1.849453 0.0000343
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 1.759213 0.0000154
K03098 APOD; apolipoprotein D and lipocalin family protein 1.757776 0.0060616
K02256 COX1; cytochrome c oxidase subunit 1 1.705164 0.0000261
K17769 TOM22; mitochondrial import receptor subunit TOM22 1.703909 0.0082306
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 1.681226 0.0000629
K02262 COX3; cytochrome c oxidase subunit 3 1.593552 0.0000505
K10583 UBE2S, E2EPF; ubiquitin-conjugating enzyme E2 S 1.572378 0.0040913
K02261 COX2; cytochrome c oxidase subunit 2 1.554875 0.0000517
K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 1.550412 0.0005111
K02125 ATPeF08, MTATP8, ATP8; F-type H+-transporting ATPase subunit 8 1.548533 0.0001840
K05757 ARPC1A_B; actin related protein 43892 complex, subunit 1A/1B 1.379567 0.0069916
K02941 RP-LP0, RPLP0; large subunit ribosomal protein LP0 1.328357 0.0083823
K18727 TNPO2, IPO3, KPNB2B; transportin-2 1.309357 0.0021068
K17565 PPP1R26; protein phosphatase 1 regulatory subunit 26 1.261023 0.0090406
K02901 RP-L27e, RPL27; large subunit ribosomal protein L27e 1.234855 0.0029414
K18752 TNPO1, IPO2, KPNB2; transportin-1 1.110764 0.0085421
K07766 E3.6.1.52; diphosphoinositol-polyphosphate diphosphatase 1.096184 0.0037824
K11247 SH3GL; endophilin-A 1.045827 0.0009235
K14455 GOT2; aspartate aminotransferase, mitochondrial 1.026448 0.0025992
K11993 ACOT6; acyl-CoA thioesterase 6 -1.022016 0.0072266
K16362 FLRT; leucine-rich repeat transmembrane protein FLRT -1.032869 0.0036952
K08961 K08961; chondroitin-sulfate-ABC endolyase/exolyase -1.046008 0.0061217
K02898 RP-L26e, RPL26; large subunit ribosomal protein L26e -1.084513 0.0088971
K04985 PKD1; polycystin 1 -1.094280 0.0040461
K02980 RP-S29e, RPS29; small subunit ribosomal protein S29e -1.111203 0.0058887
K15040 VDAC2; voltage-dependent anion channel protein 2 -1.122858 0.0056760
K03249 EIF3F; translation initiation factor 3 subunit F -1.130201 0.0019101
K02432 FZD1_7, fz; frizzled 44013 -1.178607 0.0075719
K07980 KIR3DL, CD158; killer cell immunoglobulin-like receptor 3DL -1.211362 0.0058397
K02872 RP-L13Ae, RPL13A; large subunit ribosomal protein L13Ae -1.355498 0.0028612
K02951 RP-S12e, RPS12; small subunit ribosomal protein S12e -1.480073 0.0097971
K02947 RP-S10e, RPS10; small subunit ribosomal protein S10e -1.512282 0.0066140
K02768 PTS-Fru-EIIA, fruB; PTS system, fructose-specific IIA component -1.579825 0.0051358
K05995 pepE; dipeptidase E -1.601694 0.0030411
K00163 aceE; pyruvate dehydrogenase E1 component -1.671457 0.0040851
K00368 nirK; nitrite reductase (NO-forming) -1.695229 0.0048353
K02770 PTS-Fru-EIIC, fruA; PTS system, fructose-specific IIC component -1.727637 0.0041018
K05589 ftsB; cell division protein FtsB -1.735533 0.0059633
K07670 mtrA; two-component system, OmpR family, response regulator MtrA -1.762458 0.0023952
K02769 PTS-Fru-EIIB, fruA; PTS system, fructose-specific IIB component -1.772604 0.0034711
K00611 OTC, argF, argI; ornithine carbamoyltransferase -1.775059 0.0038423
K03801 lipB; lipoyl(octanoyl) transferase -1.783586 0.0045839
K00058 serA, PHGDH; D-3-phosphoglycerate dehydrogenase / 2-oxoglutarate reductase -1.787206 0.0037298
K01207 nagZ; beta-N-acetylhexosaminidase -1.801148 0.0042518
K06122 dhbE; glycerol dehydratase small subunit -1.811703 0.0010157
K02421 fliR; flagellar biosynthetic protein FliR -8.657679 0.0000195
K04065 osmY; hyperosmotically inducible periplasmic protein -8.666040 0.0000078
K07590 RP-L7A, rplGB; large subunit ribosomal protein L7A -8.682366 0.0000007
K06603 flaG; flagellar protein FlaG -8.684221 0.0000005
K00413 CYC1, CYT1, petC; ubiquinol-cytochrome c reductase cytochrome c1 subunit -8.691016 0.0000026
K03090 sigB; RNA polymerase sigma-B factor -8.702081 0.0037978
K00662 aacC; aminoglycoside 3-N-acetyltransferase -8.745173 0.0000019
K15635 apgM; 2,3-bisphosphoglycerate-independent phosphoglycerate mutase -8.749110 0.0006290
K02389 flgD; flagellar basal-body rod modification protein FlgD -8.751046 0.0042398
K08984 yjdF; putative membrane protein -8.792184 0.0019274
K05298 GAPA; glyceraldehyde-3-phosphate dehydrogenase (NADP+) (phosphorylating) -8.805830 0.0000300
K04070 pflX; putative pyruvate formate lyase activating enzyme -8.815048 0.0080126
K16293 psrB; polysulfide reductase chain B -8.819294 0.0000001
K00282 gcvPA; glycine dehydrogenase subunit 1 -8.826747 0.0064730
K02390 flgE; flagellar hook protein FlgE -8.867032 0.0001173
K04835 mal; methylaspartate ammonia-lyase -8.888296 0.0000142
K13034 ATCYSC1; L-3-cyanoalanine synthase/ cysteine synthase -8.910907 0.0007810
K13652 K13652; AraC family transcriptional regulator -8.933437 0.0000003
K01173 ENDOG; endonuclease G, mitochondrial -8.966743 0.0039007
K02416 fliM; flagellar motor switch protein FliM -8.984770 0.0002853
K07080 K07080; uncharacterized protein -8.989049 0.0000105
K06992 K06992; uncharacterized protein -9.020148 0.0009517
K02499 yabN; tetrapyrrole methylase family protein / MazG family protein -9.031013 0.0076265
K09979 K09979; uncharacterized protein -9.092154 0.0000000
K18285 mqnE; aminodeoxyfutalosine synthase -9.118999 0.0007298
K02415 fliL; flagellar FliL protein -9.138959 0.0000291
K18828 mvpA, vapC; tRNA(fMet)-specific endonuclease VapC -9.140282 0.0000054
K01305 iadA; beta-aspartyl-dipeptidase (metallo-type) -9.179806 0.0058965
K01678 E4.2.1.2AB, fumB; fumarate hydratase subunit beta -9.207153 0.0020642
K02405 fliA; RNA polymerase sigma factor for flagellar operon FliA -9.216848 0.0020283
K02387 flgB; flagellar basal-body rod protein FlgB -9.239752 0.0009116
K03747 smg; Smg protein -9.261973 0.0025450
K11782 mqnA; chorismate dehydratase -9.271650 0.0021359
K16209 lacS, galP, rafP; lactose/raffinose/galactose permease -9.366334 0.0000001
K00176 korD, oorD; 2-oxoglutarate ferredoxin oxidoreductase subunit delta -9.372137 0.0002304
K06221 dkgA; 2,5-diketo-D-gluconate reductase A -9.381740 0.0066407
K02408 fliE; flagellar hook-basal body complex protein FliE -9.424117 0.0000042
K03412 cheB; two-component system, chemotaxis family, protein-glutamate methylesterase/glutaminase -9.443875 0.0013220
K00068 srlD; sorbitol-6-phosphate 2-dehydrogenase -9.445283 0.0083816
K02420 fliQ; flagellar biosynthetic protein FliQ -9.450876 0.0018003
K02526 kdgT; 2-keto-3-deoxygluconate permease -9.459134 0.0028565
K08641 vanX; zinc D-Ala-D-Ala dipeptidase -9.583023 0.0028897
K15034 yaeJ; ribosome-associated protein -9.597670 0.0005718
K05303 K05303; O-methyltransferase -9.630205 0.0000002
K08972 K08972; putative membrane protein -9.675798 0.0000003
K09121 larC; pyridinium-3,5-bisthiocarboxylic acid mononucleotide nickel chelatase -9.742747 0.0067663
K03413 cheY; two-component system, chemotaxis family, chemotaxis protein CheY -9.925642 0.0025733
K08999 K08999; uncharacterized protein -10.578154 0.0019780
K19165 phd; antitoxin Phd -10.866440 0.0000000
K09474 phoN; acid phosphatase (class A) -11.094953 0.0001973

Saliva vs saliva_euk

Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.

ss = kos.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss = ss[ss['Saliva_Saliva_euk_p'] < 0.01]
ss = ss.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss = pd.concat([ss.head(n=50), ss.tail(n=50)])
ss.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K18177 COA4; cytochrome c oxidase assembly factor 4 4.712745 0.0076997
K10435 MAP1LC; microtubule-associated protein 1 light chain 1.637279 0.0094501
K00889 PIP5K; 1-phosphatidylinositol-4-phosphate 5-kinase 1.423017 0.0089829
K02898 RP-L26e, RPL26; large subunit ribosomal protein L26e 1.194339 0.0089101
K02875 RP-L14e, RPL14; large subunit ribosomal protein L14e 1.177381 0.0021542
K12315 ACTG2; actin, gamma-enteric smooth muscle 1.132975 0.0045303
K11842 USP12_46; ubiquitin carboxyl-terminal hydrolase 17137 1.012306 0.0099154
K00802 SMS; spermine synthase -1.037657 0.0037952
K08961 K08961; chondroitin-sulfate-ABC endolyase/exolyase -1.046008 0.0061217
K02877 RP-L15e, RPL15; large subunit ribosomal protein L15e -1.100619 0.0041806
K02872 RP-L13Ae, RPL13A; large subunit ribosomal protein L13Ae -1.226700 0.0076598
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 -1.363466 0.0006745
K18041 PTP4A; protein tyrosine phosphatase type IVA -1.384276 0.0079469
K02261 COX2; cytochrome c oxidase subunit 2 -1.401805 0.0090004
K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit -1.411037 0.0099857
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.451248 0.0012696
K02262 COX3; cytochrome c oxidase subunit 3 -1.486945 0.0005570
K00595 cobL; precorrin-6Y C5,15-methyltransferase (decarboxylating) -1.516774 0.0056848
K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a -1.520625 0.0021185
K02256 COX1; cytochrome c oxidase subunit 1 -1.525916 0.0004332
K02891 RP-L22e, RPL22; large subunit ribosomal protein L22e -1.569902 0.0029241
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 -1.591861 0.0007432
K08659 pepDA, pepDB; dipeptidase -1.592450 0.0043681
K06217 phoH, phoL; phosphate starvation-inducible protein PhoH and related proteins -1.617955 0.0089621
K01531 mgtA, mgtB; P-type Mg2+ transporter -1.622132 0.0069774
K12554 murN; alanine adding enzyme -1.624233 0.0094830
K03767 PPIA; peptidyl-prolyl cis-trans isomerase A (cyclophilin A) -1.629183 0.0028230
K05832 ABC.X4.P; putative ABC transport system permease protein -1.639911 0.0044330
K00887 dgkA; undecaprenol kinase -1.644084 0.0016521
K07089 K07089; uncharacterized protein -1.653332 0.0027437
K01607 pcaC; 4-carboxymuconolactone decarboxylase -1.655238 0.0008895
K15634 gpmB; probable phosphoglycerate mutase -1.666755 0.0088832
K00549 metE; 5-methyltetrahydropteroyltriglutamate–homocysteine methyltransferase -1.693720 0.0059683
K02121 ATPVE, ntpE, atpE; V/A-type H+/Na+-transporting ATPase subunit E -1.705143 0.0083363
K00376 nosZ; nitrous-oxide reductase -1.714774 0.0084832
K10716 kch, trkA, mthK, pch; voltage-gated potassium channel -1.732670 0.0090649
K10440 rbsC; ribose transport system permease protein -1.736763 0.0057239
K01207 nagZ; beta-N-acetylhexosaminidase -1.737236 0.0091096
K13019 wbpI, wlbD; UDP-GlcNAc3NAcA epimerase -1.737515 0.0090235
K07718 yesM; two-component system, sensor histidine kinase YesM -1.738204 0.0029263
K11051 ABC-2.CYL.P, cylB; multidrug/hemolysin transport system permease protein -1.740461 0.0090891
K01226 treC; trehalose-6-phosphate hydrolase -1.745998 0.0096241
K03930 estA; putative tributyrin esterase -1.755111 0.0087738
K01635 lacD; tagatose 1,6-diphosphate aldolase -1.763753 0.0022842
K10439 rbsB; ribose transport system substrate-binding protein -1.769325 0.0046605
K19302 bcrC; undecaprenyl-diphosphatase -1.781207 0.0098837
K00016 LDH, ldh; L-lactate dehydrogenase -1.795384 0.0035589
K02030 ABC.PA.S; polar amino acid transport system substrate-binding protein -1.795685 0.0096538
K02500 hisF; imidazole glycerol-phosphate synthase subunit HisF -1.809006 0.0045947
K06122 dhbE; glycerol dehydratase small subunit -1.811703 0.0010157
K16345 xanP; xanthine permease XanP -5.273228 0.0000070
K10843 ERCC3, XPB; DNA excision repair protein ERCC-3 -5.303558 0.0080761
K03668 hslJ; heat shock protein HslJ -5.366141 0.0015410
K01713 pheC; cyclohexadienyl dehydratase -5.421247 0.0000006
K05563 phaF; multicomponent K+:H+ antiporter subunit F -5.475315 0.0059606
K03690 ubiJ; ubiquinone biosynthesis protein UbiJ -5.479267 0.0002922
K02062 thiQ; thiamine transport system ATP-binding protein -5.485549 0.0067886
K19046 casB, cse2; CRISPR system Cascade subunit CasB -5.526899 0.0000001
K01560 E3.8.1.2; 2-haloacid dehalogenase -5.553855 0.0008261
K00184 K00184; prokaryotic molybdopterin-containing oxidoreductase family, iron-sulfur binding subunit -5.554969 0.0000020
K15085 SLC25A42; solute carrier family 25, member 42 -5.555635 0.0000546
K00023 phbB; acetoacetyl-CoA reductase -5.564308 0.0027237
K07498 K07498; putative transposase -5.634280 0.0023036
K19506 PTS-Gfr-EIIA, gfrA; PTS system, fructoselysine/glucoselysine-specific IIA component -5.752383 0.0003501
K12990 rfbF, rhlC; rhamnosyltransferase -5.758307 0.0000971
K06988 fno; 8-hydroxy-5-deazaflavin:NADPH oxidoreductase -5.771986 0.0000954
K06996 K06996; uncharacterized protein -5.816659 0.0003177
K16237 aroP; aromatic amino acid permease -5.843438 0.0072962
K00392 sir; sulfite reductase (ferredoxin) -5.853665 0.0057438
K19507 PTS-Gfr-EIIB, gfrB; PTS system, fructoselysine/glucoselysine-specific IIB component -5.866405 0.0000158
K07775 resD; two-component system, OmpR family, response regulator ResD -5.922751 0.0004437
K02153 ATPeV0E, ATP6H; V-type H+-transporting ATPase subunit e -5.944180 0.0081414
K16651 pduX; L-threonine kinase -5.947723 0.0066696
K00613 GATM; glycine amidinotransferase -5.947723 0.0069408
K01198 xynB; xylan 1,4-beta-xylosidase -5.968610 0.0025171
K04032 eutT; ethanolamine utilization cobalamin adenosyltransferase -5.989700 0.0000007
K11003 hlyD, cyaD; hemolysin D -6.010637 0.0055756
K19167 abiQ; protein AbiQ -6.140900 0.0000001
K02052 ABC.SP.A; putative spermidine/putrescine transport system ATP-binding protein -6.158652 0.0011138
K01160 rusA; crossover junction endodeoxyribonuclease RusA -6.237059 0.0006660
K03855 fixX; ferredoxin like protein -6.252921 0.0000389
K17947 wbiB; dTDP-L-rhamnose 4-epimerase -6.270065 0.0000005
K00318 PRODH, fadM, putB; proline dehydrogenase -6.281877 0.0063591
K09138 K09138; uncharacterized protein -6.313006 0.0054260
K03577 acrR, smeT; TetR/AcrR family transcriptional regulator, acrAB operon repressor -6.332311 0.0081894
K19166 higB; mRNA interferase HigB -6.353277 0.0000080
K02275 coxB, ctaC; cytochrome c oxidase subunit II -6.363340 0.0065516
K00590 E2.1.1.113; site-specific DNA-methyltransferase (cytosine-N4-specific) -6.370443 0.0024593
K12795 SUGT1, SGT1; suppressor of G2 allele of SKP1 -6.378740 0.0000002
K07275 ompW; outer membrane protein -6.395978 0.0000073
K19180 tll; dTDP-6-deoxy-L-talose 4-dehydrogenase (NAD+) -6.531763 0.0073856
K01985 5SrRNA, rrf; 5S ribosomal RNA -6.627921 0.0013100
K03828 yjgM; putative acetyltransferase -6.629823 0.0053083
K07045 K07045; uncharacterized protein -6.808462 0.0018988
K14989 salR; two-component system, NarL family, secretion system response regulator SalR -7.160810 0.0048519
K03817 rimL; ribosomal-protein-serine acetyltransferase -7.180745 0.0081051
K01430 ureA; urease subunit gamma -7.241866 0.0032330
K10778 ada; AraC family transcriptional regulator, regulatory protein of adaptative response / methylated-DNA- -7.312382 0.0077404
K12415 comC; competence-stimulating peptide -7.473230 0.0000189
K02039 phoU; phosphate transport system protein -7.543947 0.0066958

Buccal vs buccal_euk

Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.

bbe = kos.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe = bbe[bbe['Buccal_Buccal_euk_p'] < 0.01]
bbe = bbe.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe = pd.concat([bbe.head(n=50), bbe.tail(n=50)])
bbe = bbe.groupby(by=bbe.index, axis=0).mean()
bbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K03232 EEF1B; elongation factor 1-beta 1.467244 0.0057926
K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 1.414129 0.0094551
K06210 NMNAT; nicotinamide mononucleotide adenylyltransferase 1.148057 0.0004174
K04496 CTBP; C-terminal binding protein 1.080461 0.0033637
K03960 NDUFB4; NADH dehydrogenase (ubiquinone) 1 beta subcomplex subunit 4 1.015066 0.0022632
K15410 EEF1D; elongation factor 1-delta -1.280323 0.0041850
K02993 RP-S7e, RPS7; small subunit ribosomal protein S7e -1.284215 0.0031800
K16494 PCDHB; protocadherin beta -1.541813 0.0019962
K05840 DRD5; dopamine receptor D5 -1.566770 0.0000811
K04079 HSP90A, htpG; molecular chaperone HtpG -1.742817 0.0031099
K06911 K06911; uncharacterized protein -2.045255 0.0087367
K02256 COX1; cytochrome c oxidase subunit 1 -2.094969 0.0000953
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -2.293579 0.0000154
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -2.299784 0.0002496
K02261 COX2; cytochrome c oxidase subunit 2 -2.316261 0.0000849
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 -2.426515 0.0001048
K02262 COX3; cytochrome c oxidase subunit 3 -2.431236 0.0002102
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 -2.438527 0.0001136
K00412 CYTB, petB; ubiquinol-cytochrome c reductase cytochrome b subunit -2.444177 0.0000958
K02126 ATPeF0A, MTATP6, ATP6; F-type H+-transporting ATPase subunit a -2.503513 0.0000047
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 -2.582554 0.0000143
K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 -2.677270 0.0003982
K10439 rbsB; ribose transport system substrate-binding protein -2.687404 0.0092403
K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 -2.748077 0.0051131
K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L -2.765365 0.0000193
K00950 folK; 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine diphosphokinase -2.802235 0.0021672
K03559 exbD; biopolymer transport protein ExbD -3.012558 0.0031365
K03218 rlmB; 23S rRNA (guanosine2251-2’-O)-methyltransferase -3.116803 0.0095932
K03925 mraZ; MraZ protein -3.154068 0.0097584
K03319 TC.DASS; divalent anion:Na+ symporter, DASS family -3.195524 0.0053183
K18928 lldE; L-lactate dehydrogenase complex protein LldE -3.294311 0.0023776
K08610 ADAM21; disintegrin and metalloproteinase domain-containing protein 21 -3.340978 0.0000233
K16741 ALMS1; alstrom syndrome protein 1 -3.356812 0.0001533
K02517 lpxL, htrB; Kdo2-lipid IVA lauroyltransferase/acyltransferase -3.404739 0.0054749
K00782 lldG; L-lactate dehydrogenase complex protein LldG -3.419871 0.0035777
K06281 hyaB, hybC; hydrogenase large subunit -3.429246 0.0090692
K03723 mfd; transcription-repair coupling factor (superfamily II helicase) -3.464216 0.0081496
K00525 E1.17.4.1A, nrdA, nrdE; ribonucleoside-diphosphate reductase alpha chain -3.500904 0.0060818
K03596 lepA; GTP-binding protein LepA -3.522567 0.0099846
K05786 rarD; chloramphenicol-sensitive protein RarD -3.534048 0.0062804
K01207 nagZ; beta-N-acetylhexosaminidase -3.539285 0.0051537
K04290 S1PR3, EDG3; sphingosine 1-phosphate receptor 3 -3.577329 0.0000749
K01610 E4.1.1.49, pckA; phosphoenolpyruvate carboxykinase (ATP) -3.666843 0.0014867
K03496 parA, soj; chromosome partitioning protein -3.755157 0.0065088
K00831 serC, PSAT1; phosphoserine aminotransferase -3.811166 0.0002465
K01911 menE; O-succinylbenzoic acid—CoA ligase -3.909487 0.0088855
K01753 dsdA; D-serine dehydratase -3.974443 0.0000856
K12437 pks13; polyketide synthase 13 -4.015661 0.0011796
K03629 recF; DNA replication and repair protein RecF -4.034093 0.0051844
K16090 fiu; catecholate siderophore receptor -4.160175 0.0000637
K02000 proV; glycine betaine/proline transport system ATP-binding protein -8.436459 0.0000000
K09117 K09117; uncharacterized protein -8.481400 0.0000214
K03773 fklB; FKBP-type peptidyl-prolyl cis-trans isomerase FklB -8.510498 0.0059909
K00335 nuoF; NADH-quinone oxidoreductase subunit F -8.514234 0.0012381
K07776 regX3; two-component system, OmpR family, response regulator RegX3 -8.536497 0.0043416
K04516 AROA1, aroA; chorismate mutase -8.558918 0.0000679
K00012 UGDH, ugd; UDPglucose 6-dehydrogenase -8.582749 0.0088989
K04095 fic; cell filamentation protein -8.586069 0.0000049
K03071 secB; preprotein translocase subunit SecB -8.616858 0.0086368
K10119 msmG; raffinose/stachyose/melibiose transport system permease protein -8.635010 0.0074487
K02075 ABC.ZM.P; zinc/manganese transport system permease protein -8.649968 0.0086134
K02217 ftnA, ftn; ferritin -8.659756 0.0058665
K03297 emrE, qac, mmr, smr; small multidrug resistance pump -8.674286 0.0008830
K07481 K07481; transposase, IS5 family -8.686355 0.0008838
K02065 mlaF, linL, mkl; phospholipid/cholesterol/gamma-HCH transport system ATP-binding protein -8.699583 0.0009880
K02007 cbiM; cobalt/nickel transport system permease protein -8.814449 0.0000073
K09890 arfA; alternative ribosome-rescue factor -8.835224 0.0063704
K03797 E3.4.21.102, prc, ctpA; carboxyl-terminal processing protease -8.835284 0.0037340
K00348 nqrC; Na+-transporting NADH:ubiquinone oxidoreductase subunit C -8.838146 0.0087200
K03719 lrp; Lrp/AsnC family transcriptional regulator, leucine-responsive regulatory protein -8.845129 0.0000017
K02536 lpxD; UDP-3-O- -8.856674 0.0000006
K07084 yuiF; putative amino acid transporter -8.864766 0.0029329
K13256 psiE; protein PsiE -8.872480 0.0050807
K09765 queH; epoxyqueuosine reductase -8.893468 0.0061978
K00647 fabB; 3-oxoacyl- -8.911359 0.0014707
K01872 AARS, alaS; alanyl-tRNA synthetase -8.936109 0.0013203
K07571 K07571; S1 RNA binding domain protein -8.972784 0.0000418
K02500 hisF; imidazole glycerol-phosphate synthase subunit HisF -8.994848 0.0080137
K00275 pdxH, PNPO; pyridoxamine 5’-phosphate oxidase -9.003445 0.0000003
K00677 lpxA; UDP-N-acetylglucosamine acyltransferase -9.009434 0.0000036
K15580 oppA, mppA; oligopeptide transport system substrate-binding protein -9.039172 0.0053359
K05595 marC; multiple antibiotic resistance protein -9.059555 0.0013466
K08234 yaeR; glyoxylase I family protein -9.061412 0.0066773
K10009 tcyB, yecS; L-cystine transport system permease protein -9.069022 0.0046980
K09791 K09791; uncharacterized protein -9.197084 0.0000262
K03684 rnd; ribonuclease D -9.310103 0.0034952
K00024 mdh; malate dehydrogenase -9.378307 0.0018311
K01627 kdsA; 2-dehydro-3-deoxyphosphooctonate aldolase (KDO 8-P synthase) -9.391367 0.0096899
K02032 ABC.PE.A1; peptide/nickel transport system ATP-binding protein -9.395496 0.0022133
K07075 K07075; uncharacterized protein -9.406457 0.0000642
K06891 clpS; ATP-dependent Clp protease adaptor protein ClpS -9.419182 0.0000077
K02348 elaA; ElaA protein -9.447487 0.0046015
K02803 PTS-Nag-EIIB, nagE; PTS system, N-acetylglucosamine-specific IIB component -9.454598 0.0016069
K03569 mreB; rod shape-determining protein MreB and related proteins -9.496033 0.0028899
K03924 moxR; MoxR-like ATPase -9.509025 0.0061251
K07053 E3.1.3.97; 3’,5’-nucleoside bisphosphate phosphatase -9.753894 0.0071143
K09794 K09794; uncharacterized protein -9.836549 0.0000028
K07507 mgtC; putative Mg2+ transporter-C (MgtC) family protein -10.261910 0.0055478
K06925 tsaE; tRNA threonylcarbamoyladenosine biosynthesis protein TsaE -10.825951 0.0032855
K07166 K07166; ACT domain-containing protein -11.318376 0.0081144

Blood vs saliva_euk

Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.

blbe = kos.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe = blbe[blbe['Blood_Saliva_euk_p'] < 0.01]
blbe = blbe.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe = pd.concat([blbe.head(n=50), blbe.tail(n=50)])
blbe = blbe.groupby(by=blbe.index, axis=0).mean()
blbe.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 15.8599627 0.0000000
K02914 RP-L34, MRPL34, rpmH; large subunit ribosomal protein L34 14.4043152 0.0000045
K02078 acpP; acyl carrier protein 13.5786127 0.0000004
K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 13.5595018 0.0001025
K02518 infA; translation initiation factor IF-1 13.4822104 0.0000008
K02911 RP-L32, MRPL32, rpmF; large subunit ribosomal protein L32 13.4537152 0.0000097
K01992 ABC-2.P; ABC-2 type transport system permease protein 13.4439104 0.0000026
K03530 hupB; DNA-binding protein HU-beta 13.4266761 0.0000025
K02907 RP-L30, MRPL30, rpmD; large subunit ribosomal protein L30 13.3759103 0.0000811
K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 13.3749776 0.0000062
K02916 RP-L35, MRPL35, rpmI; large subunit ribosomal protein L35 13.3685308 0.0000002
K01990 ABC-2.A; ABC-2 type transport system ATP-binding protein 13.3105462 0.0000021
K03073 secE; preprotein translocase subunit SecE 13.3041245 0.0000081
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 13.2838540 0.0000045
K02965 RP-S19, rpsS; small subunit ribosomal protein S19 13.1295251 0.0000008
K02961 RP-S17, MRPS17, rpsQ; small subunit ribosomal protein S17 13.1229086 0.0015489
K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 13.1228108 0.0000027
K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 13.1077884 0.0034733
K02968 RP-S20, rpsT; small subunit ribosomal protein S20 13.1014287 0.0000357
K07496 K07496; putative transposase 13.0494535 0.0002718
K07024 SPP; sucrose-6-phosphatase 13.0489273 0.0000011
K02013 ABC.FEV.A; iron complex transport system ATP-binding protein 13.0228458 0.0000024
K02946 RP-S10, MRPS10, rpsJ; small subunit ribosomal protein S10 12.9606831 0.0000051
K02935 RP-L7, MRPL12, rplL; large subunit ribosomal protein L7/L12 12.9321671 0.0000059
K02003 ABC.CD.A; putative ABC transport system ATP-binding protein 12.8907924 0.0000058
K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 12.8857555 0.0000060
K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 12.8791002 0.0000032
K02888 RP-L21, MRPL21, rplU; large subunit ribosomal protein L21 12.8509584 0.0000069
K06142 hlpA, ompH; outer membrane protein 12.8497520 0.0000007
K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 12.8486413 0.0000027
K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 12.8465529 0.0000059
K02876 RP-L15, MRPL15, rplO; large subunit ribosomal protein L15 12.8284344 0.0000062
K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 12.8000670 0.0000357
K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 12.7890104 0.0000324
K02996 RP-S9, MRPS9, rpsI; small subunit ribosomal protein S9 12.7888721 0.0000007
K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 12.7788152 0.0000084
K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 12.7729633 0.0028441
K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 12.7655111 0.0000007
K02952 RP-S13, rpsM; small subunit ribosomal protein S13 12.7558847 0.0000334
K02994 RP-S8, rpsH; small subunit ribosomal protein S8 12.7486487 0.0000004
K01358 clpP, CLPP; ATP-dependent Clp protease, protease subunit 12.7150029 0.0000057
K03496 parA, soj; chromosome partitioning protein 12.7090303 0.0000130
K02878 RP-L16, MRPL16, rplP; large subunit ribosomal protein L16 12.6748519 0.0000017
K02890 RP-L22, MRPL22, rplV; large subunit ribosomal protein L22 12.6714513 0.0000177
K02904 RP-L29, rpmC; large subunit ribosomal protein L29 12.6586979 0.0000430
K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial 12.6575291 0.0000035
K02931 RP-L5, MRPL5, rplE; large subunit ribosomal protein L5 12.6275570 0.0000019
K02015 ABC.FEV.P; iron complex transport system permease protein 12.6193624 0.0000018
K03088 rpoE; RNA polymerase sigma-70 factor, ECF subfamily 12.6135742 0.0000013
K02867 RP-L11, MRPL11, rplK; large subunit ribosomal protein L11 12.6094849 0.0000105
K04074 divIVA; cell division initiation protein 5.7454479 0.0004852
K01541 5.7154325 0.0000648
K02817 PTS-Tre-EIIA, treP; PTS system, trehalose-specific IIA component 5.7098664 0.0000062
K12172 RANBP2, NUP358; E3 SUMO-protein ligase RanBP2 5.6757778 0.0000068
K03582 recB; exodeoxyribonuclease V beta subunit 5.6751309 0.0002133
K09529 DNAJC9; DnaJ homolog subfamily C member 9 5.6736436 0.0000001
K04313 GPR6; G protein-coupled receptor 6 5.6672245 0.0000146
K04683 TFDP1; transcription factor Dp-1 5.6427218 0.0000086
K00750 GYG1, GYG2; glycogenin 5.6013967 0.0000001
K11842 USP12_46; ubiquitin carboxyl-terminal hydrolase 17137 5.5921757 0.0000001
K00121 frmA, ADH5, adhC; S-(hydroxymethyl)glutathione dehydrogenase / alcohol dehydrogenase 5.5885080 0.0000365
K19083 braD, bceA; bacitracin transport system ATP-binding protein 5.5274928 0.0001288
K17455 FSCN2; fascin2 5.5021798 0.0000008
K01209 abfA; alpha-L-arabinofuranosidase 5.4857734 0.0000004
K13197 IGF2BP3; insulin-like growth factor 2 mRNA-binding protein 3 5.4571779 0.0000002
K01101 E3.1.3.41; 4-nitrophenyl phosphatase 5.3807031 0.0000120
K03095 sprL; SprT-like protein 5.3676058 0.0000007
K11090 LA, SSB; lupus La protein 5.3458764 0.0000129
K00731 C1GALT1; glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase 5.3326491 0.0000413
K02265 COX5B; cytochrome c oxidase subunit 5b 5.3323070 0.0000053
K04288 S1PR1, EDG1, CD363; sphingosine 1-phosphate receptor 1 5.2652232 0.0000008
K17274 S100A10; calpactin-1 light chain 5.2565093 0.0000327
K17391 IGF2BP1; insulin-like growth factor 2 mRNA-binding protein 1 5.2516136 0.0001783
K06053 RBPSUH, RBPJK; recombining binding protein suppressor of hairless 5.1890617 0.0000017
K12755 MYL9; myosin regulatory light chain 9 5.0889277 0.0000783
K04875 KCNA2, KV1.2; potassium voltage-gated channel Shaker-related subfamily A member 2 5.0600541 0.0000005
K11904 vgrG; type VI secretion system secreted protein VgrG 5.0367433 0.0035040
K07986 ULBP; UL16 binding protein 4.9550450 0.0003358
K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 4.9425193 0.0000015
K08643 zmpB; zinc metalloprotease ZmpB 4.9348739 0.0011437
K19611 fepA, pfeA, iroN, pirA; ferric enterobactin receptor 4.7953583 0.0000419
K17624 engCP, engBF, endoEF; endo-alpha-N-acetylgalactosaminidase 4.6651482 0.0006807
K06727 FCRL, IRTA, CD307; Fc receptor-like protein 4.6457501 0.0000708
K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 4.6189248 0.0000001
K14682 argAB; amino-acid N-acetyltransferase 4.5573539 0.0014321
K04876 KCNA3, KV1.3; potassium voltage-gated channel Shaker-related subfamily A member 3 4.5167626 0.0000083
K04292 S1PR2, EDG5; sphingosine 1-phosphate receptor 2 4.2912577 0.0002126
K02001 proW; glycine betaine/proline transport system permease protein 4.1421000 0.0005923
K01390 iga; IgA-specific metalloendopeptidase 4.0510644 0.0036671
K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase 3.9642023 0.0009294
K01940 argG, ASS1; argininosuccinate synthase 2.7965061 0.0076286
K03767 PPIA; peptidyl-prolyl cis-trans isomerase A (cyclophilin A) 1.4732279 0.0039824
K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 -0.8221195 0.0030452
K02261 COX2; cytochrome c oxidase subunit 2 -1.3314744 0.0099234
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 -1.3379217 0.0020646
K02262 COX3; cytochrome c oxidase subunit 3 -1.4490693 0.0005670
K02256 COX1; cytochrome c oxidase subunit 1 -1.5164495 0.0005461
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 -1.6760517 0.0034799
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.8265865 0.0027652
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 -1.8764987 0.0010947

Blood vs buccal_euk

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

blse = kos.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse = blse[blse['Blood_Buccal_euk_p'] < 0.01]
blse = blse.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse = pd.concat([blse.head(n=50), blse.tail(n=50)])
blse = blse.groupby(by=blse.index, axis=0).mean()
blse.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 16.1818093 0.0002471
K01992 ABC-2.P; ABC-2 type transport system permease protein 11.5128420 0.0000618
K03574 mutT, NUDT15, MTH2; 8-oxo-dGTP diphosphatase 10.9861718 0.0001413
K02904 RP-L29, rpmC; large subunit ribosomal protein L29 10.8776580 0.0000007
K01756 purB, ADSL; adenylosuccinate lyase 10.8536183 0.0005353
K07024 SPP; sucrose-6-phosphatase 10.7967873 0.0005105
K02954 RP-S14, MRPS14, rpsN; small subunit ribosomal protein S14 10.6293239 0.0000120
K02518 infA; translation initiation factor IF-1 10.3714855 0.0000136
K02078 acpP; acyl carrier protein 10.3023185 0.0000021
K01104 E3.1.3.48; protein-tyrosine phosphatase 10.0810563 0.0000283
K03075 secG; preprotein translocase subunit SecG 10.0686858 0.0000066
K00948 PRPS, prsA; ribose-phosphate pyrophosphokinase 10.0608984 0.0000517
K08998 K08998; uncharacterized protein 10.0573727 0.0001643
K03499 trkA, ktrA; trk system potassium uptake protein 10.0058758 0.0000700
K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 9.9877413 0.0001515
K16787 ecfA2; energy-coupling factor transport system ATP-binding protein 9.9691833 0.0001840
K02372 fabZ; 3-hydroxyacyl- 9.9637621 0.0000294
K00939 adk, AK; adenylate kinase 9.9595009 0.0078494
K02871 RP-L13, MRPL13, rplM; large subunit ribosomal protein L13 9.8753227 0.0000010
K02015 ABC.FEV.P; iron complex transport system permease protein 9.8667187 0.0002448
K02440 GLPF; glycerol uptake facilitator protein 9.8408240 0.0001011
K04564 SOD2; superoxide dismutase, Fe-Mn family 9.7810834 0.0000055
K11991 tadA; tRNA(adenine34) deaminase 9.7639518 0.0000219
K02881 RP-L18, MRPL18, rplR; large subunit ribosomal protein L18 9.7624176 0.0000000
K06199 crcB, FEX; fluoride exporter 9.7233258 0.0000191
K00928 lysC; aspartate kinase 9.7065719 0.0003610
K02886 RP-L2, MRPL2, rplB; large subunit ribosomal protein L2 9.6906968 0.0000908
K03073 secE; preprotein translocase subunit SecE 9.6729322 0.0003735
K01738 cysK; cysteine synthase 9.6561421 0.0000015
K00790 murA; UDP-N-acetylglucosamine 1-carboxyvinyltransferase 9.6408742 0.0002500
K09710 ybeB; ribosome-associated protein 9.6404415 0.0000461
K01246 tag; DNA-3-methyladenine glycosylase I 9.6303301 0.0000045
K04077 groEL, HSPD1; chaperonin GroEL 9.5891273 0.0047969
K01915 glnA, GLUL; glutamine synthetase 9.5824446 0.0000279
K01933 purM; phosphoribosylformylglycinamidine cyclo-ligase 9.5736189 0.0002323
K03687 GRPE; molecular chaperone GrpE 9.5729556 0.0000106
K00615 E2.2.1.1, tktA, tktB; transketolase 9.5505026 0.0001070
K02433 gatA, QRSL1; aspartyl-tRNA(Asn)/glutamyl-tRNA(Gln) amidotransferase subunit A 9.5442794 0.0008482
K01923 purC; phosphoribosylaminoimidazole-succinocarboxamide synthase 9.5357375 0.0000074
K02338 dnaN; DNA polymerase III subunit beta 9.5332006 0.0001710
K02992 RP-S7, MRPS7, rpsG; small subunit ribosomal protein S7 9.5083188 0.0002118
K00820 glmS, GFPT; glutamine—fructose-6-phosphate transaminase (isomerizing) 9.5003694 0.0001099
K02996 RP-S9, MRPS9, rpsI; small subunit ribosomal protein S9 9.4773279 0.0000790
K03671 trxA; thioredoxin 1 9.4758376 0.0000215
K01687 ilvD; dihydroxy-acid dehydratase 9.4662150 0.0003556
K03708 ctsR; transcriptional regulator of stress and heat shock response 9.4498962 0.0000162
K02314 dnaB; replicative DNA helicase 9.4429692 0.0002617
K01963 accD; acetyl-CoA carboxylase carboxyl transferase subunit beta 9.4244850 0.0001347
K07271 licD; lipopolysaccharide cholinephosphotransferase 9.4218430 0.0002400
K02837 prfC; peptide chain release factor 3 9.3918449 0.0002680
K03284 corA; magnesium transporter 7.1522003 0.0001015
K01661 menB; naphthoate synthase 7.1493059 0.0000647
K01647 CS, gltA; citrate synthase 7.1195886 0.0000067
K18928 lldE; L-lactate dehydrogenase complex protein LldE 7.0983073 0.0000061
K12757 MYL12; myosin regulatory light chain 12 7.0955579 0.0000007
K05840 DRD5; dopamine receptor D5 7.0636557 0.0000000
K08857 NEK1_4_5; NIMA (never in mitosis gene a)-related kinase 38443 7.0589315 0.0000035
K03466 ftsK, spoIIIE; DNA segregation ATPase FtsK/SpoIIIE, S-DNA-T family 7.0565895 0.0008980
K03960 NDUFB4; NADH dehydrogenase (ubiquinone) 1 beta subcomplex subunit 4 7.0253055 0.0000000
K03639 moaA, CNX2; GTP 3’,8-cyclase 7.0054202 0.0000799
K00782 lldG; L-lactate dehydrogenase complex protein LldG 7.0019641 0.0000004
K14455 GOT2; aspartate aminotransferase, mitochondrial 6.9945243 0.0000001
K03629 recF; DNA replication and repair protein RecF 6.9883502 0.0000612
K00653 CDY; chromodomain protein Y 6.9102327 0.0000007
K05786 rarD; chloramphenicol-sensitive protein RarD 6.9046115 0.0000255
K00371 narH, narY, nxrB; nitrate reductase / nitrite oxidoreductase, beta subunit 6.8665611 0.0001685
K03547 sbcD, mre11; DNA repair protein SbcD/Mre11 6.8586688 0.0000767
K02342 dnaQ; DNA polymerase III subunit epsilon 6.7129347 0.0000956
K00525 E1.17.4.1A, nrdA, nrdE; ribonucleoside-diphosphate reductase alpha chain 6.7013656 0.0000661
K17455 FSCN2; fascin2 6.6408974 0.0000088
K00240 sdhB, frdB; succinate dehydrogenase / fumarate reductase, iron-sulfur subunit 6.6270061 0.0013377
K10343 SPSB1_4, SSB1, SSB4; SPRY domain-containing SOCS box protein 43922 6.5750024 0.0000001
K03723 mfd; transcription-repair coupling factor (superfamily II helicase) 6.5721780 0.0000566
K01778 dapF; diaminopimelate epimerase 6.5471812 0.0000360
K01207 nagZ; beta-N-acetylhexosaminidase 6.4067020 0.0000378
K01652 E2.2.1.6L, ilvB, ilvG, ilvI; acetolactate synthase I/II/III large subunit 6.3920399 0.0007322
K06911 K06911; uncharacterized protein 6.3133922 0.0000030
K02858 ribB, RIB3; 3,4-dihydroxy 2-butanone 4-phosphate synthase 6.3106401 0.0003793
K03527 ispH, lytB; 4-hydroxy-3-methylbut-2-en-1-yl diphosphate reductase 6.1477167 0.0002274
K01911 menE; O-succinylbenzoic acid—CoA ligase 6.1373608 0.0000285
K01744 aspA; aspartate ammonia-lyase 6.0818258 0.0002584
K00099 dxr; 1-deoxy-D-xylulose-5-phosphate reductoisomerase 5.9628220 0.0004205
K04875 KCNA2, KV1.2; potassium voltage-gated channel Shaker-related subfamily A member 2 5.9414027 0.0000049
K01610 E4.1.1.49, pckA; phosphoenolpyruvate carboxykinase (ATP) 5.9186660 0.0000373
K12755 MYL9; myosin regulatory light chain 9 5.8986788 0.0000010
K03319 TC.DASS; divalent anion:Na+ symporter, DASS family 5.8744409 0.0000259
K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 5.7928033 0.0000036
K02517 lpxL, htrB; Kdo2-lipid IVA lauroyltransferase/acyltransferase 5.7843811 0.0001123
K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 5.7651144 0.0000040
K00831 serC, PSAT1; phosphoserine aminotransferase 5.7394312 0.0000028
K02930 RP-L4e, RPL4; large subunit ribosomal protein L4e 5.6662498 0.0000003
K08301 rng, cafA; ribonuclease G 5.6512400 0.0000924
K06281 hyaB, hybC; hydrogenase large subunit 5.4295802 0.0001948
K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 5.4132320 0.0000080
K03546 sbcC, rad50; DNA repair protein SbcC/Rad50 5.2338079 0.0017231
K16092 btuB; vitamin B12 transporter 4.7102953 0.0017984
K16495 PCDHGA; protocadherin gamma subfamily A 4.5153796 0.0000049
K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase 1.7257634 0.0057157
K06478 PTPRC, CD45; receptor-type tyrosine-protein phosphatase C -0.3261990 0.0020246
K00522 FTH1; ferritin heavy chain -0.9651333 0.0070830

HUMAnN3

As for HUMAnN2 I am now looking at the KEGG orthologs assigned to the Uniref50 database.

kos = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/metaphlan_humann3/genes_ko_50.csv', header=0, index_col=0)
kegg_list = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/Human_metagenome/kegg_list.csv', header=0, index_col=0)
kegg_list.drop_duplicates(inplace=True)
kegg_dict = {}
kl = list(kegg_list.index.values)
for ko in kl:
  kegg_dict[ko] = ko+' '+kegg_list.loc[ko, 'Product']
kos = kos.rename(index=kegg_dict)

Blood vs saliva

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

bs3 = kos.loc[:, ['Saliva_Blood_FC', 'Saliva_Blood_p']]
bs3 = bs3[bs3['Saliva_Blood_p'] < 0.01]
bs3 = bs3.sort_values(by=['Saliva_Blood_FC'], ascending=False).rename(columns={'Saliva_Blood_FC':'Log2 fold change', 'Saliva_Blood_p':'p'})
bs3 = pd.concat([bs3.head(n=50), bs3.tail(n=50)])
bs3 = bs3.groupby(by=bs3.index, axis=0).mean()
bs3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bs3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K02987 RP-S4e, RPS4; small subunit ribosomal protein S4e 2.900731 0.0015733
K05692 ACTB_G1; actin beta/gamma 1 2.742918 0.0005082
K07604 KRT1; type I keratin, acidic 1.975235 0.0047374
K02984 RP-S3Ae, RPS3A; small subunit ribosomal protein S3Ae 1.928509 0.0087034
K19469 FTO; mRNA N6-methyladenine demethylase 1.602431 0.0069474
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 1.395337 0.0050401
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 1.334215 0.0001341
K02262 COX3; cytochrome c oxidase subunit 3 1.146687 0.0020483
K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 1.097022 0.0068121
K02256 COX1; cytochrome c oxidase subunit 1 -1.054638 0.0038354
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.271552 0.0002763
K09206 KLF5; krueppel-like factor 5 -2.551036 0.0013898
K03097 CSNK2A; casein kinase II subunit alpha -2.640297 0.0002630
K07208 RHEB; Ras homolog enriched in brain -2.663001 0.0003144
K09627 PRSS23; serine protease 23 -2.935477 0.0035377
K10571 DET1; de-etiolated-1 -2.937331 0.0036227
K05001 KCNJ8, KIR6.1; potassium inwardly-rectifying channel subfamily J member 8 -3.017643 0.0019871
K08428 GPR139; G protein-coupled receptor 139 -3.127061 0.0008065
K05421 BSF3, CLC; B-cell stimulating factor 3 -3.152678 0.0004729
K04902 KCNG3, KV6.3; potassium voltage-gated channel subfamily G member 3 -3.156789 0.0025693
K17658 TEFM; transcription elongation factor, mitochondrial -3.176972 0.0002493
K11182 AOC1, ABP1; diamine oxidase -3.187546 0.0013948
K06174 ABCE1, Rli1; ATP-binding cassette, sub-family E, member 1 -3.256348 0.0000379
K05002 KCNJ9, KIR3.3; potassium inwardly-rectifying channel subfamily J member 9 -3.257962 0.0060961
K04506 SIAH1; E3 ubiquitin-protein ligase SIAH1 -3.295996 0.0021080
K04136 ADRA1B; adrenergic receptor alpha-1B -3.312017 0.0000004
K06625 CDKN1A, P21, CIP1; cyclin-dependent kinase inhibitor 1A -3.346373 0.0000829
K10504 ZBTB25; zinc finger and BTB domain-containing protein 25 -3.350434 0.0005058
K01228 MOGS; mannosyl-oligosaccharide glucosidase -3.364307 0.0009338
K15286 SLC35E4; solute carrier family 35, member E4 -3.391393 0.0000965
K17092 ANXA2; annexin A2 -3.396008 0.0008336
K08859 STK35, PDIK1, CLIK1; serine/threonine kinase 35 -3.402229 0.0003786
K11310 GTF3C4, KAT12; general transcription factor 3C polypeptide 4 -3.424249 0.0013772
K10269 FBXL3; F-box and leucine-rich repeat protein 3 -3.439181 0.0020834
K06262 GP1BB, CD42c; platelet glycoprotein Ib beta chain -3.447902 0.0003672
K10458 KLHL21; kelch-like protein 21 -3.501530 0.0003003
K00889 PIP5K; 1-phosphatidylinositol-4-phosphate 5-kinase -3.504482 0.0020400
K06278 CAV1; caveolin 1 -3.526911 0.0002417
K07861 RAC3; Ras-related C3 botulinum toxin substrate 3 -3.529358 0.0020969
K13213 MATR3; matrin 3 -3.539074 0.0056097
K20212 PROX2; prospero homeobox 2 -3.542098 0.0002030
K13113 UBL5, HUB1; ubiquitin-like protein 5 -3.551432 0.0016761
K02949 RP-S11e, RPS11; small subunit ribosomal protein S11e -3.580150 0.0017033
K22236 EXPH5; exophilin-5 -3.592531 0.0001144
K07941 ARF6; ADP-ribosylation factor 6 -3.594768 0.0009074
K15688 MUL1; E3 ubiquitin-protein ligase MUL1 -3.596998 0.0015710
K04662 BMP4; bone morphogenetic protein 4 -3.615678 0.0005611
K10536 aguA; agmatine deiminase -3.618663 0.0005001
K16198 YWHAG_H; 37694 protein gamma/eta -3.621081 0.0012849
K04300 SREB3, GPR173; super conserved receptor expressed in brain 3 -3.622873 0.0033145
K02528 ksgA; 16S rRNA (adenine1518-N6/adenine1519-N6)-dimethyltransferase -10.634240 0.0000001
K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 -10.635716 0.0000000
K02114 ATPF1E, atpC; F-type H+-transporting ATPase subunit epsilon -10.637286 0.0000000
K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 -10.649774 0.0000000
K03040 rpoA; DNA-directed RNA polymerase subunit alpha -10.658494 0.0000000
K03977 engA, der; GTPase -10.668886 0.0000000
K01887 RARS, argS; arginyl-tRNA synthetase -10.669018 0.0000000
K02337 dnaE; DNA polymerase III subunit alpha -10.669362 0.0000000
K01495 GCH1, folE; GTP cyclohydrolase IA -10.694049 0.0000003
K02967 RP-S2, MRPS2, rpsB; small subunit ribosomal protein S2 -10.712133 0.0000000
K03798 ftsH, hflB; cell division protease FtsH -10.736973 0.0000000
K02650 pilA; type IV pilus assembly protein PilA -10.744343 0.0000002
K07284 srtA; sortase A -10.747371 0.0000003
K12952 ctpE; cation-transporting P-type ATPase E -10.754468 0.0000000
K03687 GRPE; molecular chaperone GrpE -10.762873 0.0000000
K03470 rnhB; ribonuclease HII -10.766713 0.0000000
K21572 susD; starch-binding outer membrane protein, SusD/RagB family -10.770900 0.0000068
K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 -10.789977 0.0000000
K02016 ABC.FEV.S; iron complex transport system substrate-binding protein -10.802965 0.0000001
K02945 RP-S1, rpsA; small subunit ribosomal protein S1 -10.813169 0.0000000
K02015 ABC.FEV.P; iron complex transport system permease protein -10.831730 0.0000001
K01876 aspS; aspartyl-tRNA synthetase -10.861371 0.0000000
K03980 murJ, mviN; putative peptidoglycan lipid II flippase -10.864994 0.0000000
K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase -10.870003 0.0000000
K00604 MTFMT, fmt; methionyl-tRNA formyltransferase -10.873510 0.0000000
K03601 xseA; exodeoxyribonuclease VII large subunit -10.874460 0.0000000
K02950 RP-S12, MRPS12, rpsL; small subunit ribosomal protein S12 -10.890262 0.0000000
K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA -10.911499 0.0000000
K07652 vicK; two-component system, OmpR family, sensor histidine kinase VicK -10.929455 0.0000001
K07056 rsmI; 16S rRNA (cytidine1402-2’-O)-methyltransferase -11.009937 0.0000000
K02108 ATPF0A, atpB; F-type H+-transporting ATPase subunit a -11.016583 0.0000000
K02004 ABC.CD.P; putative ABC transport system permease protein -11.036046 0.0000000
K01929 murF; UDP-N-acetylmuramoyl-tripeptide–D-alanyl-D-alanine ligase -11.069960 0.0000002
K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial -11.093019 0.0000002
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 -11.093321 0.0000000
K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 -11.096519 0.0077198
K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 -11.136386 0.0000000
K03711 fur, zur, furB; Fur family transcriptional regulator, ferric uptake regulator -11.137503 0.0000000
K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 -11.145289 0.0000000
K02520 infC, MTIF3; translation initiation factor IF-3 -11.157493 0.0000000
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 -11.209100 0.0000000
K03629 recF; DNA replication and repair protein RecF -11.282828 0.0000000
K03284 corA; magnesium transporter -11.300507 0.0000002
K01928 murE; UDP-N-acetylmuramoyl-L-alanyl-D-glutamate–2,6-diaminopimelate ligase -11.356828 0.0000000
K02518 infA; translation initiation factor IF-1 -11.421470 0.0000000
K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 -11.429634 0.0000001
K02035 ABC.PE.S; peptide/nickel transport system substrate-binding protein -11.584982 0.0000000
K02428 rdgB; XTP/dITP diphosphohydrolase -11.977146 0.0000000
K01992 ABC-2.P; ABC-2 type transport system permease protein -12.183237 0.0000000
UNGROUPED -101.487653 0.0001436

Blood vs buccal

Negative fold changes are higher in buccal samples while positive are higher in blood samples.

bb3 = kos.loc[:, ['Buccal_Blood_FC', 'Buccal_Blood_p']]
bb3 = bb3[bb3['Buccal_Blood_p'] < 0.01]
bb3 = bb3.sort_values(by=['Buccal_Blood_FC'], ascending=False).rename(columns={'Buccal_Blood_FC':'Log2 fold change', 'Buccal_Blood_p':'p'})
bb3 = pd.concat([bb3.head(n=50), bb3.tail(n=50)])
bb3 = bb3.groupby(by=bb3.index, axis=0).mean()
bb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bb3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K05692 ACTB_G1; actin beta/gamma 1 1.4993031 0.0003430
K02262 COX3; cytochrome c oxidase subunit 3 -0.6901933 0.0065397
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 -0.7995222 0.0071439
K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 -0.9286128 0.0067070
K02256 COX1; cytochrome c oxidase subunit 1 -1.0054815 0.0015965
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.3406126 0.0000704
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 -1.7012226 0.0000637
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -1.7732693 0.0000640
K02261 COX2; cytochrome c oxidase subunit 2 -1.8317668 0.0000277
K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L -1.9266330 0.0002886
K01197 hya; hyaluronoglucosaminidase -5.5547745 0.0000297
K04186 ACKR4, CCRL1, CCR11; atypical chemokine receptor 4 -5.5603638 0.0000562
K08051 PLEKHA8, FAPP2; pleckstrin homology domain containing family A member 8 -5.7567048 0.0000020
K02975 RP-S25e, RPS25; small subunit ribosomal protein S25e -6.0071094 0.0000464
K00716 FUT3; galactoside 3(4)-fucosyltransferase 3 -6.0412983 0.0000008
K03605 hyaD, hybD; hydrogenase maturation protease -6.0741342 0.0001277
K22063 ISCA1; iron-sulfur cluster assembly 1 -6.5689145 0.0000128
K04181 CCR6, CD196; C-C chemokine receptor type 6 -6.5690809 0.0000001
K00161 PDHA, pdhA; pyruvate dehydrogenase E1 component alpha subunit -6.5963219 0.0000039
K00974 cca; tRNA nucleotidyltransferase (CCA-adding enzyme) -6.6135533 0.0000080
K12267 msrAB; peptide methionine sulfoxide reductase msrA/msrB -6.6908854 0.0000086
K05592 deaD, cshA; ATP-dependent RNA helicase DeaD -6.7206069 0.0001525
K03402 argR, ahrC; transcriptional regulator of arginine metabolism -6.7282698 0.0000033
K19468 KCNJ18; potassium inwardly-rectifying channel subfamily J member 18 -6.7950723 0.0000096
K05787 hupA; DNA-binding protein HU-alpha -6.8009142 0.0000039
K09067 ASCL; achaete-scute complex protein -6.8498691 0.0000022
K02271 COX7B; cytochrome c oxidase subunit 7b -6.8592266 0.0000063
K02873 RP-L13e, RPL13; large subunit ribosomal protein L13e -6.9679175 0.0000089
K03571 mreD; rod shape-determining protein MreD -6.9707589 0.0000810
K03062 PSMC1, RPT2; 26S proteasome regulatory subunit T2 -6.9765306 0.0000004
K16362 FLRT; leucine-rich repeat transmembrane protein FLRT -6.9876251 0.0000000
K15705 RNF152; E3 ubiquitin-protein ligase RNF152 -6.9964333 0.0000004
K04206 NPY4R; neuropeptide Y receptor type 4 -7.0025676 0.0000001
K09014 sufB; Fe-S cluster assembly protein SufB -7.0457215 0.0000014
K03531 ftsZ; cell division protein FtsZ -7.0472135 0.0000029
K00549 metE; 5-methyltetrahydropteroyltriglutamate–homocysteine methyltransferase -7.0575570 0.0001285
K02012 afuA, fbpA; iron(III) transport system substrate-binding protein -7.0665801 0.0000956
K04047 dps; starvation-inducible DNA-binding protein -7.0994269 0.0000053
K00865 glxK, garK; glycerate 2-kinase -7.0996546 0.0000000
K15583 oppD; oligopeptide transport system ATP-binding protein -7.1030263 0.0000901
K00759 APRT, apt; adenine phosphoribosyltransferase -7.1375675 0.0000001
K15051 endA; DNA-entry nuclease -7.1395383 0.0000011
K02882 RP-L18Ae, RPL18A; large subunit ribosomal protein L18Ae -7.1535295 0.0000398
K09817 znuC; zinc transport system ATP-binding protein -7.1585516 0.0000625
K01744 aspA; aspartate ammonia-lyase -7.1621156 0.0000000
K00362 nirB; nitrite reductase (NADH) large subunit -7.1815300 0.0000013
K09501 SERPINH1, HSP47; serpin peptidase inhibitor, clade H, member 1 -7.2541492 0.0000014
K01870 IARS, ileS; isoleucyl-tRNA synthetase -7.3234073 0.0000020
K01462 PDF, def; peptide deformylase -7.3667162 0.0000013
K03499 trkA, ktrA; trk system potassium uptake protein -7.4149298 0.0000113
K03687 GRPE; molecular chaperone GrpE -8.5932077 0.0000636
K00728 POMT, pmt; dolichyl-phosphate-mannose-protein mannosyltransferase -8.6341350 0.0000113
K12019 TRIM43S; tripartite motif-containing protein 43/48/49/64/77 -8.6406359 0.0000002
K01897 ACSL, fadD; long-chain acyl-CoA synthetase -8.6451236 0.0000000
K01673 cynT, can; carbonic anhydrase -8.6489442 0.0000010
K06133 LYS5, acpT; 4’-phosphopantetheinyl transferase -8.6506859 0.0000086
K01256 pepN; aminopeptidase N -8.6534216 0.0000075
K03801 lipB; lipoyl(octanoyl) transferase -8.6579620 0.0000085
K08680 menH; 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase -8.7457409 0.0000029
K03601 xseA; exodeoxyribonuclease VII large subunit -8.7460410 0.0000178
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 -8.7782578 0.0000050
K03629 recF; DNA replication and repair protein RecF -8.7908081 0.0000020
K02358 tuf, TUFM; elongation factor Tu -8.7911934 0.0001599
K00919 ispE; 4-diphosphocytidyl-2-C-methyl-D-erythritol kinase -8.8275283 0.0000001
K03308 TC.NSS; neurotransmitter:Na+ symporter, NSS family -8.8362899 0.0000020
K02777 PTS-Glc-EIIA, crr; PTS system, sugar-specific IIA component -8.8622937 0.0000004
K00240 sdhB, frdB; succinate dehydrogenase / fumarate reductase, iron-sulfur subunit -8.8736866 0.0000005
K01752 E4.3.1.17, sdaA, sdaB, tdcG; L-serine dehydratase -8.8849557 0.0000001
K07442 TRM61, GCD14; tRNA (adenine57-N1/adenine58-N1)-methyltransferase catalytic subunit -8.9166101 0.0000001
K12952 ctpE; cation-transporting P-type ATPase E -8.9502777 0.0000001
K01887 RARS, argS; arginyl-tRNA synthetase -8.9638112 0.0000031
K03561 exbB; biopolymer transport protein ExbB -8.9871294 0.0000039
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 -9.0621394 0.0000014
K00948 PRPS, prsA; ribose-phosphate pyrophosphokinase -9.1014159 0.0000000
K02016 ABC.FEV.S; iron complex transport system substrate-binding protein -9.1059215 0.0000007
K04096 smf; DNA processing protein -9.1325659 0.0000008
K01495 GCH1, folE; GTP cyclohydrolase IA -9.1419679 0.0000000
K01790 rfbC, rmlC; dTDP-4-dehydrorhamnose 3,5-epimerase -9.1423791 0.0000003
K00067 rfbD, rmlD; dTDP-4-dehydrorhamnose reductase -9.1423791 0.0000003
K01923 purC; phosphoribosylaminoimidazole-succinocarboxamide synthase -9.1888857 0.0000031
K08998 K08998; uncharacterized protein -9.1971222 0.0000040
K00036 G6PD, zwf; glucose-6-phosphate 1-dehydrogenase -9.2062001 0.0000012
K02470 gyrB; DNA gyrase subunit B -9.2629947 0.0000002
K02335 polA; DNA polymerase I -9.2744374 0.0000085
K03310 TC.AGCS; alanine or glycine:cation symporter, AGCS family -9.2785495 0.0000040
K03536 rnpA; ribonuclease P protein component -9.2975345 0.0000043
K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase -9.3335747 0.0000043
K02970 RP-S21, MRPS21, rpsU; small subunit ribosomal protein S21 -9.3704015 0.0000001
K03559 exbD; biopolymer transport protein ExbD -9.4022669 0.0000000
K00059 fabG, OAR1; 3-oxoacyl- -9.4756747 0.0000004
K01754 E4.3.1.19, ilvA, tdcB; threonine dehydratase -9.5346320 0.0000000
K00088 IMPDH, guaB; IMP dehydrogenase -9.6008352 0.0000000
K14347 SLC10A7, P7; solute carrier family 10 (sodium/bile acid cotransporter), member 7 -9.6046050 0.0000106
K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA -9.6845660 0.0000000
K02518 infA; translation initiation factor IF-1 -9.7149416 0.0000003
K05985 rnmV; ribonuclease M5 -9.7184461 0.0000195
K02015 ABC.FEV.P; iron complex transport system permease protein -9.9488969 0.0000096
K01992 ABC-2.P; ABC-2 type transport system permease protein -10.2947787 0.0000014
K06199 crcB, FEX; fluoride exporter -10.3477796 0.0001305
UNGROUPED -279.5860424 0.0009433

Saliva vs buccal

Negative fold changes are higher in saliva samples while positive are higher in buccal samples.

sb3 = kos.loc[:, ['Saliva_Buccal_FC', 'Saliva_Buccal_p']]
sb3 = sb3[sb3['Saliva_Buccal_p'] < 0.01]
sb3 = sb3.sort_values(by=['Saliva_Buccal_FC'], ascending=False).rename(columns={'Saliva_Buccal_FC':'Log2 fold change', 'Saliva_Buccal_p':'p'})
sb3 = pd.concat([sb3.head(n=50), sb3.tail(n=50)])
sb3 = sb3.groupby(by=sb3.index, axis=0).mean()
sb3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$sb3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K22463 afsA; 2-oxo-3-(phosphooxy)propyl 3-oxoalkanoate synthase 2.850946 0.0058151
K00865 glxK, garK; glycerate 2-kinase 2.820485 0.0067396
K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 2.399969 0.0051015
K02261 COX2; cytochrome c oxidase subunit 2 2.202837 0.0000476
K10354 ACTA1; actin, alpha skeletal muscle 2.193567 0.0065157
K03879 ND2; NADH-ubiquinone oxidoreductase chain 2 2.133737 0.0000685
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 2.127053 0.0000110
K02125 ATPeF08, MTATP8, ATP8; F-type H+-transporting ATPase subunit 8 2.106681 0.0000919
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 2.071658 0.0000274
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 2.066092 0.0000330
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 2.047173 0.0000078
K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 2.025634 0.0000472
K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L 2.005996 0.0001796
K01673 cynT, can; carbonic anhydrase 1.985742 0.0096769
K02256 COX1; cytochrome c oxidase subunit 1 1.953674 0.0003900
K02951 RP-S12e, RPS12; small subunit ribosomal protein S12e 1.879717 0.0034727
K02262 COX3; cytochrome c oxidase subunit 3 1.836880 0.0002819
K02865 RP-L10Ae, RPL10A; large subunit ribosomal protein L10Ae 1.824018 0.0057603
K02880 RP-L17e, RPL17; large subunit ribosomal protein L17e 1.810086 0.0046569
K02987 RP-S4e, RPS4; small subunit ribosomal protein S4e 1.720987 0.0023484
K00716 FUT3; galactoside 3(4)-fucosyltransferase 3 1.478105 0.0092689
K07604 KRT1; type I keratin, acidic 1.279948 0.0058158
K02917 RP-L35Ae, RPL35A; large subunit ribosomal protein L35Ae 1.029740 0.0089774
K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 -1.138422 0.0067601
K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA -1.226933 0.0011742
K04077 groEL, HSPD1; chaperonin GroEL -1.315605 0.0012724
K02835 prfA, MTRF1, MRF1; peptide chain release factor 1 -1.550880 0.0007800
K03060 rpoZ; DNA-directed RNA polymerase subunit omega -1.588916 0.0043101
K01887 RARS, argS; arginyl-tRNA synthetase -1.705206 0.0063117
K02518 infA; translation initiation factor IF-1 -1.706528 0.0030988
K12952 ctpE; cation-transporting P-type ATPase E -1.804191 0.0067174
K03551 ruvB; holliday junction DNA helicase RuvB -1.814960 0.0059714
K03595 era, ERAL1; GTPase -1.832702 0.0073609
K01992 ABC-2.P; ABC-2 type transport system permease protein -1.888459 0.0072512
K01897 ACSL, fadD; long-chain acyl-CoA synthetase -1.893089 0.0084500
K02238 comEC; competence protein ComEC -2.087058 0.0011352
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 -2.146961 0.0077292
K00943 tmk, DTYMK; dTMP kinase -2.194727 0.0016675
K02959 RP-S16, MRPS16, rpsP; small subunit ribosomal protein S16 -2.286885 0.0054462
K00872 thrB1; homoserine kinase -2.304481 0.0004883
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 -2.315063 0.0041248
K02003 ABC.CD.A; putative ABC transport system ATP-binding protein -2.437543 0.0027193
K03629 recF; DNA replication and repair protein RecF -2.492020 0.0040615
K09206 KLF5; krueppel-like factor 5 -2.551036 0.0013898
K02892 RP-L23, MRPL23, rplW; large subunit ribosomal protein L23 -2.595122 0.0032808
K03097 CSNK2A; casein kinase II subunit alpha -2.640297 0.0002630
K02528 ksgA; 16S rRNA (adenine1518-N6/adenine1519-N6)-dimethyltransferase -2.657221 0.0007924
K01870 IARS, ileS; isoleucyl-tRNA synthetase -2.676801 0.0018858
K01834 PGAM, gpmA; 2,3-bisphosphoglycerate-dependent phosphoglycerate mutase -2.779467 0.0013469
K02879 RP-L17, MRPL17, rplQ; large subunit ribosomal protein L17 -2.856197 0.0092922
K00226 pyrD; dihydroorotate dehydrogenase (fumarate) -9.729337 0.0000005
K02340 holA; DNA polymerase III subunit delta -9.769813 0.0074585
K19302 bcrC; undecaprenyl-diphosphatase -9.788827 0.0042853
K01714 dapA; 4-hydroxy-tetrahydrodipicolinate synthase -9.824873 0.0068356
K04068 nrdG; anaerobic ribonucleoside-triphosphate reductase activating protein -9.826740 0.0000007
K01002 mdoB; phosphoglycerol transferase -9.836073 0.0000006
K05873 cyaB; adenylate cyclase, class 2 -9.874195 0.0000002
K02890 RP-L22, MRPL22, rplV; large subunit ribosomal protein L22 -9.895246 0.0025241
K19710 E2.7.7.53; ATP adenylyltransferase -9.896449 0.0000006
K01579 panD; aspartate 1-decarboxylase -9.915735 0.0000001
K06885 K06885; uncharacterized protein -9.923682 0.0037099
K06180 rluD; 23S rRNA pseudouridine1911/1915/1917 synthase -9.960684 0.0022266
K01356 lexA; repressor LexA -10.000666 0.0000007
K02662 pilM; type IV pilus assembly protein PilM -10.002825 0.0000004
K09775 K09775; uncharacterized protein -10.039478 0.0000006
K07042 ybeY, yqfG; probable rRNA maturation factor -10.045636 0.0020905
K07009 gatD; lipid II isoglutaminyl synthase (glutamine-hydrolysing) -10.067533 0.0000005
K07082 K07082; UPF0755 protein -10.074038 0.0043652
K03046 rpoC; DNA-directed RNA polymerase subunit beta’ -10.082929 0.0035729
K07462 recJ; single-stranded-DNA-specific exonuclease -10.084816 0.0000007
K10563 mutM, fpg; formamidopyrimidine-DNA glycosylase -10.116843 0.0000001
K13993 HSP20; HSP20 family protein -10.125501 0.0000007
K02653 pilC; type IV pilus assembly protein PilC -10.140647 0.0000005
K00615 E2.2.1.1, tktA, tktB; transketolase -10.185928 0.0025127
K07447 ruvX; putative holliday junction resolvase -10.238002 0.0000000
K07005 K07005; uncharacterized protein -10.252818 0.0000006
K09117 K09117; uncharacterized protein -10.253138 0.0000007
K16302 CNNM; metal transporter CNNM -10.277650 0.0000004
K02992 RP-S7, MRPS7, rpsG; small subunit ribosomal protein S7 -10.306224 0.0038282
K01358 clpP, CLPP; ATP-dependent Clp protease, protease subunit -10.329657 0.0036579
K07027 K07027; glycosyltransferase 2 family protein -10.331001 0.0000003
K09799 K09799; uncharacterized protein -10.356046 0.0000005
K03685 rnc, DROSHA, RNT1; ribonuclease III -10.375157 0.0015344
K03703 uvrC; excinuclease ABC subunit C -10.396153 0.0068530
K01873 VARS, valS; valyl-tRNA synthetase -10.424692 0.0014117
K03070 secA; preprotein translocase subunit SecA -10.468957 0.0037976
K04075 tilS, mesJ; tRNA(Ile)-lysidine synthase -10.473047 0.0000000
K03625 nusB; transcription antitermination protein NusB -10.502848 0.0030047
K00942 E2.7.4.8, gmk; guanylate kinase -10.529623 0.0024473
K02114 ATPF1E, atpC; F-type H+-transporting ATPase subunit epsilon -10.637286 0.0000000
K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 -10.649774 0.0077066
K02967 RP-S2, MRPS2, rpsB; small subunit ribosomal protein S2 -10.712133 0.0009110
K02650 pilA; type IV pilus assembly protein PilA -10.744343 0.0000002
K07284 srtA; sortase A -10.747371 0.0000003
K03470 rnhB; ribonuclease HII -10.766713 0.0004899
K21572 susD; starch-binding outer membrane protein, SusD/RagB family -10.770900 0.0000068
K07652 vicK; two-component system, OmpR family, sensor histidine kinase VicK -10.929455 0.0008861
K06147 ABCB-BAC; ATP-binding cassette, subfamily B, bacterial -11.093019 0.0000002
K03284 corA; magnesium transporter -11.300507 0.0000002
UNGROUPED -159.319416 0.0033425

Saliva vs saliva_euk

Negative fold changes are higher in saliva samples while positive are higher in saliva_euk samples.

ss3 = kos.loc[:, ['Saliva_Saliva_euk_FC', 'Saliva_Saliva_euk_p']]
ss3 = ss3[ss3['Saliva_Saliva_euk_p'] < 0.01]
ss3 = ss3.sort_values(by=['Saliva_Saliva_euk_FC'], ascending=False).rename(columns={'Saliva_Saliva_euk_FC':'Log2 fold change', 'Saliva_Saliva_euk_p':'p'})
ss3 = pd.concat([ss3.head(n=50), ss3.tail(n=50)])
ss3 = ss3.groupby(by=ss3.index, axis=0).mean()
ss3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$ss3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K11254 H4; histone H4 2.0312789 0.0037851
K07374 TUBA; tubulin alpha 1.6788133 0.0060293
K09560 ST13; suppressor of tumorigenicity protein 13 1.6686333 0.0085426
K07604 KRT1; type I keratin, acidic 1.6012059 0.0026179
K11253 H3; histone H3 1.5814691 0.0034163
K02870 RP-L12e, RPL12; large subunit ribosomal protein L12e 1.1022718 0.0042198
K03664 smpB; SsrA-binding protein -0.9274021 0.0091598
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 -1.0914627 0.0000497
K02891 RP-L22e, RPL22; large subunit ribosomal protein L22e -1.0921348 0.0075675
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -1.4474400 0.0043794
K03111 ssb; single-strand DNA-binding protein -1.5397928 0.0075431
K09206 KLF5; krueppel-like factor 5 -2.5510365 0.0013898
K08428 GPR139; G protein-coupled receptor 139 -3.1270607 0.0008065
K08859 STK35, PDIK1, CLIK1; serine/threonine kinase 35 -3.4022291 0.0003786
K10269 FBXL3; F-box and leucine-rich repeat protein 3 -3.4391808 0.0020834
K10536 aguA; agmatine deiminase -3.6186631 0.0005001
K04300 SREB3, GPR173; super conserved receptor expressed in brain 3 -3.6228735 0.0033145
K05223 NMB; neuromedin B -3.7296179 0.0057104
K01669 phrB; deoxyribodipyrimidine photo-lyase -3.7660531 0.0015032
K02258 COX11, ctaG; cytochrome c oxidase assembly protein subunit 11 -3.8012608 0.0022219
K02222 SFRP5; secreted frizzled-related protein 5 -3.8855100 0.0011182
K19910 SYT10; synaptotagmin-10 -3.9182049 0.0007119
K04939 KCNMB3; potassium large conductance calcium-activated channel subfamily M beta member 3 -3.9417163 0.0001364
K04969 TRPC6; transient receptor potential cation channel subfamily C member 6 -3.9536756 0.0003509
K17822 DCUN1D1_2; DCN1-like protein 43862 -4.0010991 0.0000262
K19876 STEAP4; metalloreductase STEAP4 -4.0023695 0.0000728
K13155 SNRNP35; U11/U12 small nuclear ribonucleoprotein 35 kDa protein -4.1129610 0.0003233
K22073 IBA57; transferase CAF17, mitochondrial -4.1232314 0.0000738
K16146 pep2; maltokinase -4.1639256 0.0081216
K10861 UDG2, CCNO; uracil-DNA glycosylase 2 -4.1809700 0.0003352
K16586 HAUS3; HAUS augmin-like complex subunit 3 -4.1937376 0.0000252
K16069 ADAM29; disintegrin and metalloproteinase domain-containing protein 29 -4.2406876 0.0000199
K10040 peb1B, glnP, glnM; aspartate/glutamate/glutamine transport system permease protein -4.2514462 0.0059826
K00865 glxK, garK; glycerate 2-kinase -4.2791696 0.0014200
K10476 KBTBD11; kelch repeat and BTB domain-containing protein 11 -4.2847677 0.0006106
K02036 pstB; phosphate transport system ATP-binding protein -4.3169584 0.0007591
K05307 THTPA; thiamine-triphosphatase -4.3342036 0.0003188
K09039 NFE2; nuclear factor erythroid 2 -4.3363638 0.0000797
K04667 INHBA; inhibin beta A chain -4.4242494 0.0000395
K15102 SLC25A3, PHC, PIC; solute carrier family 25 (mitochondrial phosphate transporter), member 3 -4.4830752 0.0017722
K10712 ADO; cysteamine dioxygenase -4.5607344 0.0000705
K02160 accB, bccP; acetyl-CoA carboxylase biotin carboxyl carrier protein -4.5697394 0.0003862
K03892 arsR; ArsR family transcriptional regulator, arsenate/arsenite/antimonite-responsive transcriptional repressor -4.5804169 0.0001169
K01227 E3.2.1.96; mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase -4.6288951 0.0027745
K09997 artI; arginine transport system substrate-binding protein -4.6926176 0.0030980
K08206 SLC22A9S; MFS transporter, OCT family, solute carrier family 22 (organic anion/cation transporter), member 9/10/19/24/25 -4.7049019 0.0000053
K04517 tyrA2; prephenate dehydrogenase -4.7425379 0.0004995
K17344 ROM1, TSPAN23; rod outer segment membrane protein 1 -4.7861964 0.0002944
K00419 QCR9, UCRC; ubiquinol-cytochrome c reductase subunit 9 -4.7999481 0.0075587
K02153 ATPeV0E, ATP6H; V-type H+-transporting ATPase subunit e -4.8363638 0.0006969
K02199 ccmG, dsbE; cytochrome c biogenesis protein CcmG, thiol:disulfide interchange protein DsbE -5.1522058 0.0000403
K09808 lolC_E; lipoprotein-releasing system permease protein -5.1884507 0.0001386
K22463 afsA; 2-oxo-3-(phosphooxy)propyl 3-oxoalkanoate synthase -5.1949011 0.0000211
K03592 pmbA; PmbA protein -5.2291259 0.0000043
K02445 glpT; MFS transporter, OPA family, glycerol-3-phosphate transporter -5.2370393 0.0046875
K00627 DLAT, aceF, pdhC; pyruvate dehydrogenase E2 component (dihydrolipoamide acetyltransferase) -5.2442432 0.0000001
K14443 TOB; protein Tob/BTG -5.2481079 0.0070673
K16859 PGF; placenta growth factor -5.2517623 0.0000307
K00417 QCR7, UQCRB; ubiquinol-cytochrome c reductase subunit 7 -5.3392869 0.0011680
K19804 lapB; lipopolysaccharide assembly protein B -5.4020946 0.0000521
K01193 INV, sacA; beta-fructofuranosidase -5.4035020 0.0000000
K02549 menC; O-succinylbenzoate synthase -5.4195231 0.0000451
K07407 E3.2.1.22B, galA, rafA; alpha-galactosidase -5.4935953 0.0000159
K13671 K13671; alpha-1,2-mannosyltransferase -5.5403114 0.0000658
K21828 arcR; CRP/FNR family transcriptional regulator, arginine deiminase pathway regulator -5.7368659 0.0017124
K07386 pepO; putative endopeptidase -5.7453284 0.0002614
K03784 deoD; purine-nucleoside phosphorylase -5.7791686 0.0000081
K13628 iscA; iron-sulfur cluster assembly protein -5.7889425 0.0095671
K10541 mglC; methyl-galactoside transport system permease protein -5.9340829 0.0000001
K03695 clpB; ATP-dependent Clp protease ATP-binding subunit ClpB -5.9491229 0.0000008
K02501 hisH; imidazole glycerol-phosphate synthase subunit HisH -6.1148747 0.0000009
K20533 trbI; type IV secretion system protein TrbI -6.1158474 0.0000011
K03074 secF; preprotein translocase subunit SecF -6.1206397 0.0000000
K03290 nanT; MFS transporter, SHS family, sialic acid transporter -6.1237791 0.0005453
K00368 nirK; nitrite reductase (NO-forming) -6.1604933 0.0000245
K07220 K07220; uncharacterized protein -6.2003992 0.0003796
K22319 oleC; olefin beta-lactone synthetase -6.3621904 0.0000700
K19180 tll; dTDP-6-deoxy-L-talose 4-dehydrogenase (NAD+) -6.3671514 0.0098966
K03179 ubiA; 4-hydroxybenzoate polyprenyltransferase -6.4147945 0.0000048
K02770 PTS-Fru-EIIC, fruA; PTS system, fructose-specific IIC component -6.4527764 0.0000008
K02769 PTS-Fru-EIIB, fruA; PTS system, fructose-specific IIB component -6.4527764 0.0000008
K04083 hslO; molecular chaperone Hsp33 -6.4745551 0.0000024
K00609 pyrB, PYR2; aspartate carbamoyltransferase catalytic subunit -6.4853801 0.0033392
K03747 smg; Smg protein -6.6444347 0.0000040
K01673 cynT, can; carbonic anhydrase -6.6632026 0.0000000
K04754 mlaA, vacJ; phospholipid-binding lipoprotein MlaA -6.7234948 0.0000001
K07670 mtrA; two-component system, OmpR family, response regulator MtrA -6.7331937 0.0000000
K11189 PTS-HPR; phosphocarrier protein -6.8845382 0.0045279
K01129 dgt; dGTPase -6.9440136 0.0000490
K02440 GLPF; glycerol uptake facilitator protein -6.9930044 0.0000030
K03072 secD; preprotein translocase subunit SecD -7.1830725 0.0000054
K02033 ABC.PE.P; peptide/nickel transport system permease protein -7.2629446 0.0095901
K05522 nei; endonuclease VIII -7.2726538 0.0000010
K02111 ATPF1A, atpA; F-type H+/Na+-transporting ATPase subunit alpha -7.4788154 0.0091373
K11906 vasD, lip; type VI secretion system protein VasD -7.5285192 0.0000000
K06320 cgeB; spore maturation protein CgeB -7.7614668 0.0000014
K21237 gp2; Phi29virus DNA polymerase -7.8498310 0.0000867
K04488 iscU, nifU; nitrogen fixation protein NifU and related proteins -7.9371426 0.0000000
K21449 ata, sadA, emaA; trimeric autotransporter adhesin -8.0468978 0.0000042
K07461 K07461; putative endonuclease -8.5351052 0.0000009

Buccal vs buccal_euk

Negative fold changes are higher in buccal samples while positive are higher in buccal_euk samples.

bbe3 = kos.loc[:, ['Buccal_Buccal_euk_FC', 'Buccal_Buccal_euk_p']]
bbe3 = bbe3[bbe3['Buccal_Buccal_euk_p'] < 0.01]
bbe3 = bbe3.sort_values(by=['Buccal_Buccal_euk_FC'], ascending=False).rename(columns={'Buccal_Buccal_euk_FC':'Log2 fold change', 'Buccal_Buccal_euk_p':'p'})
bbe3 = pd.concat([bbe3.head(n=50), bbe3.tail(n=50)])
bbe3 = bbe3.groupby(by=bbe3.index, axis=0).mean()
bbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$bbe3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K02256 COX1; cytochrome c oxidase subunit 1 -1.679128 0.0029860
K02262 COX3; cytochrome c oxidase subunit 3 -1.713256 0.0027708
K03878 ND1; NADH-ubiquinone oxidoreductase chain 1 -1.775227 0.0030581
K02261 COX2; cytochrome c oxidase subunit 2 -2.557425 0.0005212
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -2.604483 0.0067667
K03880 ND3; NADH-ubiquinone oxidoreductase chain 3 -2.679944 0.0039535
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -2.720035 0.0002712
K03883 ND5; NADH-ubiquinone oxidoreductase chain 5 -3.128334 0.0002943
K03882 ND4L; NADH-ubiquinone oxidoreductase chain 4L -3.710827 0.0004718
K01197 hya; hyaluronoglucosaminidase -5.554774 0.0000297
K08051 PLEKHA8, FAPP2; pleckstrin homology domain containing family A member 8 -5.756705 0.0000020
K00716 FUT3; galactoside 3(4)-fucosyltransferase 3 -6.041298 0.0000008
K04181 CCR6, CD196; C-C chemokine receptor type 6 -6.569081 0.0000001
K00161 PDHA, pdhA; pyruvate dehydrogenase E1 component alpha subunit -6.596322 0.0000039
K00974 cca; tRNA nucleotidyltransferase (CCA-adding enzyme) -6.613553 0.0000080
K03402 argR, ahrC; transcriptional regulator of arginine metabolism -6.728270 0.0000033
K05787 hupA; DNA-binding protein HU-alpha -6.800914 0.0000039
K16362 FLRT; leucine-rich repeat transmembrane protein FLRT -6.987625 0.0000000
K03531 ftsZ; cell division protein FtsZ -7.047213 0.0000029
K00549 metE; 5-methyltetrahydropteroyltriglutamate–homocysteine methyltransferase -7.057557 0.0001285
K02012 afuA, fbpA; iron(III) transport system substrate-binding protein -7.066580 0.0000956
K04047 dps; starvation-inducible DNA-binding protein -7.099427 0.0000053
K15051 endA; DNA-entry nuclease -7.139538 0.0000011
K09501 SERPINH1, HSP47; serpin peptidase inhibitor, clade H, member 1 -7.254149 0.0000014
K01870 IARS, ileS; isoleucyl-tRNA synthetase -7.323407 0.0000020
K04763 xerD; integrase/recombinase XerD -7.415967 0.0000014
K02892 RP-L23, MRPL23, rplW; large subunit ribosomal protein L23 -7.435998 0.0000041
K01817 trpF; phosphoribosylanthranilate isomerase -7.491139 0.0000306
K01520 dut, DUT; dUTP pyrophosphatase -7.544095 0.0000759
K03327 TC.MATE, SLC47A, norM, mdtK, dinF; multidrug resistance protein, MATE family -7.566111 0.0000001
K09861 K09861; uncharacterized protein -7.580484 0.0000002
K00981 E2.7.7.41, CDS1, CDS2, cdsA; phosphatidate cytidylyltransferase -7.645951 0.0002095
K03271 gmhA, lpcA; D-sedoheptulose 7-phosphate isomerase -7.657154 0.0057884
K02238 comEC; competence protein ComEC -7.729226 0.0000000
K13571 pafA; proteasome accessory factor A -7.746068 0.0000001
K01821 praC, xylH; 4-oxalocrotonate tautomerase -7.799769 0.0000008
K00886 ppgK; polyphosphate glucokinase -7.804302 0.0000006
K01243 mtnN, mtn, pfs; adenosylhomocysteine nucleosidase -7.825905 0.0000006
K02835 prfA, MTRF1, MRF1; peptide chain release factor 1 -7.846768 0.0000000
K07461 K07461; putative endonuclease -7.915114 0.0000077
K00241 sdhC, frdC; succinate dehydrogenase / fumarate reductase, cytochrome b subunit -7.920181 0.0000048
K07768 senX3; two-component system, OmpR family, sensor histidine kinase SenX3 -7.930145 0.0000068
K00656 E2.3.1.54, pflD; formate C-acetyltransferase -7.940533 0.0000001
K00616 E2.2.1.2, talA, talB; transaldolase -7.954862 0.0000002
K02033 ABC.PE.P; peptide/nickel transport system permease protein -8.022345 0.0000002
K02884 RP-L19, MRPL19, rplS; large subunit ribosomal protein L19 -8.025169 0.0000121
K22463 afsA; 2-oxo-3-(phosphooxy)propyl 3-oxoalkanoate synthase -8.045847 0.0000022
K00872 thrB1; homoserine kinase -8.098105 0.0000000
K03595 era, ERAL1; GTPase -8.142956 0.0000002
K06855 MAPK4_6; mitogen-activated protein kinase 43986 -8.149037 0.0000042
K02887 RP-L20, MRPL20, rplT; large subunit ribosomal protein L20 -8.154249 0.0000000
K03565 recX; regulatory protein -8.197517 0.0000001
K01939 purA, ADSS; adenylosuccinate synthase -8.204780 0.0000023
K02342 dnaQ; DNA polymerase III subunit epsilon -8.212993 0.0000005
K02770 PTS-Fru-EIIC, fruA; PTS system, fructose-specific IIC component -8.224142 0.0000003
K02769 PTS-Fru-EIIB, fruA; PTS system, fructose-specific IIB component -8.224142 0.0000003
K00936 pdtaS; two-component system, sensor histidine kinase PdtaS -8.330524 0.0000005
K11754 folC; dihydrofolate synthase / folylpolyglutamate synthase -8.341740 0.0000015
K03551 ruvB; holliday junction DNA helicase RuvB -8.360546 0.0000008
K01129 dgt; dGTPase -8.375383 0.0000074
K05364 -8.388278 0.0000025
K11733 lysP; lysine-specific permease -8.446310 0.0000009
K00766 trpD; anthranilate phosphoribosyltransferase -8.488092 0.0000000
K02768 PTS-Fru-EIIA, fruB; PTS system, fructose-specific IIA component -8.537095 0.0000001
K00567 ogt, MGMT; methylated-DNA- -8.583419 0.0026064
K00728 POMT, pmt; dolichyl-phosphate-mannose-protein mannosyltransferase -8.634135 0.0000113
K12019 TRIM43S; tripartite motif-containing protein 43/48/49/64/77 -8.640636 0.0000002
K01673 cynT, can; carbonic anhydrase -8.648944 0.0000010
K01256 pepN; aminopeptidase N -8.653422 0.0000075
K02358 tuf, TUFM; elongation factor Tu -8.791193 0.0058415
K03308 TC.NSS; neurotransmitter:Na+ symporter, NSS family -8.836290 0.0044429
K07442 TRM61, GCD14; tRNA (adenine57-N1/adenine58-N1)-methyltransferase catalytic subunit -8.916610 0.0053757
K04096 smf; DNA processing protein -9.132566 0.0000008
K01923 purC; phosphoribosylaminoimidazole-succinocarboxamide synthase -9.188886 0.0011003
K03536 rnpA; ribonuclease P protein component -9.297534 0.0000043
K03559 exbD; biopolymer transport protein ExbD -9.402267 0.0000000
K00059 fabG, OAR1; 3-oxoacyl- -9.475675 0.0000004
K01754 E4.3.1.19, ilvA, tdcB; threonine dehydratase -9.534632 0.0051595
K00088 IMPDH, guaB; IMP dehydrogenase -9.600835 0.0000000
K03657 uvrD, pcrA; DNA helicase II / ATP-dependent DNA helicase PcrA -9.684566 0.0000000
UNGROUPED -337.103078 0.0070408

Blood vs saliva_euk

Negative fold changes are higher in blood samples while positive are higher in saliva_euk samples.

blbe3 = kos.loc[:, ['Blood_Saliva_euk_FC', 'Blood_Saliva_euk_p']]
blbe3 = blbe3[blbe3['Blood_Saliva_euk_p'] < 0.01]
blbe3 = blbe3.sort_values(by=['Blood_Saliva_euk_FC'], ascending=False).rename(columns={'Blood_Saliva_euk_FC':'Log2 fold change', 'Blood_Saliva_euk_p':'p'})
blbe3 = pd.concat([blbe3.head(n=50), blbe3.tail(n=50)])
blbe3 = blbe3.groupby(by=blbe3.index, axis=0).mean()
blbe3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blbe3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K02963 RP-S18, MRPS18, rpsR; small subunit ribosomal protein S18 11.341959 0.0000001
K02948 RP-S11, MRPS11, rpsK; small subunit ribosomal protein S11 10.950425 0.0000000
K02518 infA; translation initiation factor IF-1 10.909749 0.0000005
K01992 ABC-2.P; ABC-2 type transport system permease protein 10.780916 0.0000174
K02913 RP-L33, MRPL33, rpmG; large subunit ribosomal protein L33 10.638615 0.0000040
K02902 RP-L28, MRPL28, rpmB; large subunit ribosomal protein L28 10.367308 0.0000000
K02428 rdgB; XTP/dITP diphosphohydrolase 10.320932 0.0000056
K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase 10.115384 0.0000006
K01928 murE; UDP-N-acetylmuramoyl-L-alanyl-D-glutamate–2,6-diaminopimelate ligase 10.092058 0.0000040
K02916 RP-L35, MRPL35, rpmI; large subunit ribosomal protein L35 10.055988 0.0000001
K02909 RP-L31, rpmE; large subunit ribosomal protein L31 10.001858 0.0000000
K03551 ruvB; holliday junction DNA helicase RuvB 9.962025 0.0000002
K02988 RP-S5, MRPS5, rpsE; small subunit ribosomal protein S5 9.939578 0.0000007
K01358 clpP, CLPP; ATP-dependent Clp protease, protease subunit 9.901599 0.0000043
K03977 engA, der; GTPase 9.881898 0.0000000
K03629 recF; DNA replication and repair protein RecF 9.843115 0.0000002
K06891 clpS; ATP-dependent Clp protease adaptor protein ClpS 9.816164 0.0000343
K02358 tuf, TUFM; elongation factor Tu 9.807406 0.0000016
K02835 prfA, MTRF1, MRF1; peptide chain release factor 1 9.766626 0.0000000
K01885 EARS, gltX; glutamyl-tRNA synthetase 9.722861 0.0000611
K02899 RP-L27, MRPL27, rpmA; large subunit ribosomal protein L27 9.716704 0.0000002
K01579 panD; aspartate 1-decarboxylase 9.675909 0.0000000
K02967 RP-S2, MRPS2, rpsB; small subunit ribosomal protein S2 9.670229 0.0000001
K00942 E2.7.4.8, gmk; guanylate kinase 9.665516 0.0000101
K02337 dnaE; DNA polymerase III subunit alpha 9.637137 0.0000000
K02108 ATPF0A, atpB; F-type H+-transporting ATPase subunit a 9.604602 0.0000009
K03073 secE; preprotein translocase subunit SecE 9.598304 0.0000013
K03470 rnhB; ribonuclease HII 9.587183 0.0000142
K03711 fur, zur, furB; Fur family transcriptional regulator, ferric uptake regulator 9.573168 0.0000004
K01495 GCH1, folE; GTP cyclohydrolase IA 9.543082 0.0000413
K03070 secA; preprotein translocase subunit SecA 9.535647 0.0000022
K02867 RP-L11, MRPL11, rplK; large subunit ribosomal protein L11 9.450057 0.0000021
K03811 pnuC; nicotinamide mononucleotide transporter 9.449862 0.0000002
K03664 smpB; SsrA-binding protein 9.441698 0.0000000
K02563 murG; UDP-N-acetylglucosamine–N-acetylmuramyl-(pentapeptide) pyrophosphoryl-undecaprenol N-acetylglucosamine transferase 9.308290 0.0000006
K00773 tgt, QTRT1; queuine tRNA-ribosyltransferase 9.279428 0.0000012
K02906 RP-L3, MRPL3, rplC; large subunit ribosomal protein L3 9.243950 0.0000001
K02914 RP-L34, MRPL34, rpmH; large subunit ribosomal protein L34 9.232331 0.0000029
K12952 ctpE; cation-transporting P-type ATPase E 9.229432 0.0000010
K00075 murB; UDP-N-acetylmuramate dehydrogenase 9.228715 0.0000005
K02338 dnaN; DNA polymerase III subunit beta 9.198856 0.0000054
K02874 RP-L14, MRPL14, rplN; large subunit ribosomal protein L14 9.184765 0.0000000
K00963 UGP2, galU, galF; UTP–glucose-1-phosphate uridylyltransferase 9.177459 0.0000001
K01091 gph; phosphoglycolate phosphatase 9.109656 0.0000004
K03624 greA; transcription elongation factor GreA 9.104214 0.0000001
K02601 nusG; transcription termination/antitermination protein NusG 9.102247 0.0000004
K19710 E2.7.7.53; ATP adenylyltransferase 9.095833 0.0000027
K00765 hisG; ATP phosphoribosyltransferase 9.092277 0.0000000
K02115 ATPF1G, atpG; F-type H+-transporting ATPase subunit gamma 9.081642 0.0000002
K03116 tatA; sec-independent protein translocase protein TatA 9.072573 0.0000218
K01704 leuD, IPMI-S; 3-isopropylmalate/(R)-2-methylmalate dehydratase small subunit 8.410856 0.0001828
K00943 tmk, DTYMK; dTMP kinase 8.381361 0.0000023
K18893 vcaM; ATP-binding cassette, subfamily B, multidrug efflux pump 8.358762 0.0000011
K08396 MRGPRX; Mas-related G protein-coupled receptor member X 8.357014 0.0000000
K13643 iscR; Rrf2 family transcriptional regulator, iron-sulfur cluster assembly transcription factor 8.341677 0.0000161
K01883 CARS, cysS; cysteinyl-tRNA synthetase 8.314948 0.0000003
K02535 lpxC; UDP-3-O- 8.314917 0.0000095
K01255 CARP, pepA; leucyl aminopeptidase 8.305561 0.0000000
K09903 pyrH; uridylate kinase 8.299529 0.0000363
K00610 pyrI; aspartate carbamoyltransferase regulatory subunit 8.284502 0.0000060
K01687 ilvD; dihydroxy-acid dehydratase 8.248209 0.0000257
K13678 cpoA; 1,2-diacylglycerol-3-alpha-glucose alpha-1,2-galactosyltransferase 8.207438 0.0000012
K01657 trpE; anthranilate synthase component I 8.184915 0.0000137
K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 8.184273 0.0000001
K02956 RP-S15, MRPS15, rpsO; small subunit ribosomal protein S15 8.168320 0.0000108
K01915 glnA, GLUL; glutamine synthetase 8.163729 0.0000003
K07224 efeO; iron uptake system component EfeO 8.116363 0.0000387
K03530 hupB; DNA-binding protein HU-beta 8.060273 0.0000157
K02823 pyrDII; dihydroorotate dehydrogenase electron transfer subunit 8.023014 0.0000302
K03594 bfr; bacterioferritin 8.011747 0.0002121
K01924 murC; UDP-N-acetylmuramate–alanine ligase 8.010583 0.0000004
K01874 MARS, metG; methionyl-tRNA synthetase 8.004532 0.0000001
K00945 cmk; CMP/dCMP kinase 7.994847 0.0000011
K00341 nuoL; NADH-quinone oxidoreductase subunit L 7.974158 0.0000295
K00948 PRPS, prsA; ribose-phosphate pyrophosphokinase 7.958184 0.0000053
K09529 DNAJC9; DnaJ homolog subfamily C member 9 7.946732 0.0000002
K03484 scrR; LacI family transcriptional regulator, sucrose operon repressor 7.921266 0.0000126
K00260 gudB, rocG; glutamate dehydrogenase 7.905971 0.0000493
K01595 ppc; phosphoenolpyruvate carboxylase 7.900376 0.0000003
K02662 pilM; type IV pilus assembly protein PilM 7.851796 0.0000046
K00760 hprT, hpt, HPRT1; hypoxanthine phosphoribosyltransferase 7.827778 0.0000024
K09560 ST13; suppressor of tumorigenicity protein 13 7.700560 0.0000007
K00376 nosZ; nitrous-oxide reductase 7.670794 0.0000014
K02357 tsf, TSFM; elongation factor Ts 7.634399 0.0000053
K19157 yafQ; mRNA interferase YafQ 7.622868 0.0000014
K03602 xseB; exodeoxyribonuclease VII small subunit 7.591479 0.0000044
K16197 YWHAB_Q_Z; 37694 protein beta/theta/zeta 7.279191 0.0000000
K10580 UBE2N, BLU, UBC13; ubiquitin-conjugating enzyme E2 N 7.204063 0.0000023
K20651 ARHGAP42, GRAF3; Rho GTPase-activating protein 42 7.204023 0.0000153
K03257 EIF4A; translation initiation factor 4A 7.166795 0.0000018
K08363 merT; mercuric ion transport protein 6.963124 0.0002421
K19879 TCAP; telethonin 6.621062 0.0000314
K00236 SDHC, SDH3; succinate dehydrogenase (ubiquinone) cytochrome b560 subunit 6.441310 0.0000309
K11097 SNRPE, SME; small nuclear ribonucleoprotein E 6.299610 0.0000624
K02975 RP-S25e, RPS25; small subunit ribosomal protein S25e 5.876647 0.0000039
K04393 CDC42; cell division control protein 42 5.856245 0.0000933
K04552 UBE2L3, UBCH7; ubiquitin-conjugating enzyme E2 L3 5.207358 0.0002249
K02256 COX1; cytochrome c oxidase subunit 1 -1.171254 0.0058121
K03884 ND6; NADH-ubiquinone oxidoreductase chain 6 -1.745828 0.0017884
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.892856 0.0068414

Blood vs buccal_euk

Negative fold changes are higher in blood samples while positive are higher in buccal_euk samples.

blse3 = kos.loc[:, ['Blood_Buccal_euk_FC', 'Blood_Buccal_euk_p']]
blse3 = blse3[blse3['Blood_Buccal_euk_p'] < 0.01]
blse3 = blse3.sort_values(by=['Blood_Buccal_euk_FC'], ascending=False).rename(columns={'Blood_Buccal_euk_FC':'Log2 fold change', 'Blood_Buccal_euk_p':'p'})
blse3 = pd.concat([blse3.head(n=50), blse.tail(n=50)])
blse3 = blse3.groupby(by=blse3.index, axis=0).mean()
blse3.sort_values(by=['Log2 fold change'], ascending=False, inplace=True)
py$blse3 %>%
  kable() %>%
  kable_styling() %>%
  scroll_box(width = "600px", height = "400px")
Log2 fold change p
K05863 SLC25A4S, ANT; solute carrier family 25 (mitochondrial adenine nucleotide translocator), member 4/5/6/31 9.3155876 0.0000046
K02934 RP-L6e, RPL6; large subunit ribosomal protein L6e 9.1316569 0.0000003
K19468 KCNJ18; potassium inwardly-rectifying channel subfamily J member 18 8.1018621 0.0000096
K03257 EIF4A; translation initiation factor 4A 7.9018195 0.0000035
K03062 PSMC1, RPT2; 26S proteasome regulatory subunit T2 7.8222486 0.0000030
K11586 CBX3, HP1G; chromobox protein 3 7.5044275 0.0000008
K03284 corA; magnesium transporter 7.1522003 0.0001015
K01661 menB; naphthoate synthase 7.1493059 0.0000647
K01647 CS, gltA; citrate synthase 7.1195886 0.0000067
K18928 lldE; L-lactate dehydrogenase complex protein LldE 7.0983073 0.0000061
K12757 MYL12; myosin regulatory light chain 12 7.0955579 0.0000007
K05840 DRD5; dopamine receptor D5 7.0636557 0.0000000
K08857 NEK1_4_5; NIMA (never in mitosis gene a)-related kinase 38443 7.0589315 0.0000035
K03466 ftsK, spoIIIE; DNA segregation ATPase FtsK/SpoIIIE, S-DNA-T family 7.0565895 0.0008980
K03960 NDUFB4; NADH dehydrogenase (ubiquinone) 1 beta subcomplex subunit 4 7.0253055 0.0000000
K03639 moaA, CNX2; GTP 3’,8-cyclase 7.0054202 0.0000799
K00782 lldG; L-lactate dehydrogenase complex protein LldG 7.0019641 0.0000004
K14455 GOT2; aspartate aminotransferase, mitochondrial 6.9945243 0.0000001
K03629 recF; DNA replication and repair protein RecF 6.9883502 0.0000612
K00653 CDY; chromodomain protein Y 6.9102327 0.0000007
K05786 rarD; chloramphenicol-sensitive protein RarD 6.9046115 0.0000255
K00371 narH, narY, nxrB; nitrate reductase / nitrite oxidoreductase, beta subunit 6.8665611 0.0001685
K03547 sbcD, mre11; DNA repair protein SbcD/Mre11 6.8586688 0.0000767
K02342 dnaQ; DNA polymerase III subunit epsilon 6.7129347 0.0000956
K00525 E1.17.4.1A, nrdA, nrdE; ribonucleoside-diphosphate reductase alpha chain 6.7013656 0.0000661
K17455 FSCN2; fascin2 6.6408974 0.0000088
K00240 sdhB, frdB; succinate dehydrogenase / fumarate reductase, iron-sulfur subunit 6.6270061 0.0013377
K10343 SPSB1_4, SSB1, SSB4; SPRY domain-containing SOCS box protein 43922 6.5750024 0.0000001
K03723 mfd; transcription-repair coupling factor (superfamily II helicase) 6.5721780 0.0000566
K01778 dapF; diaminopimelate epimerase 6.5471812 0.0000360
K01207 nagZ; beta-N-acetylhexosaminidase 6.4067020 0.0000378
K01652 E2.2.1.6L, ilvB, ilvG, ilvI; acetolactate synthase I/II/III large subunit 6.3920399 0.0007322
K06911 K06911; uncharacterized protein 6.3133922 0.0000030
K02858 ribB, RIB3; 3,4-dihydroxy 2-butanone 4-phosphate synthase 6.3106401 0.0003793
K03527 ispH, lytB; 4-hydroxy-3-methylbut-2-en-1-yl diphosphate reductase 6.1477167 0.0002274
K01911 menE; O-succinylbenzoic acid—CoA ligase 6.1373608 0.0000285
K01744 aspA; aspartate ammonia-lyase 6.0818258 0.0002584
K00099 dxr; 1-deoxy-D-xylulose-5-phosphate reductoisomerase 5.9628220 0.0004205
K04875 KCNA2, KV1.2; potassium voltage-gated channel Shaker-related subfamily A member 2 5.9414027 0.0000049
K01610 E4.1.1.49, pckA; phosphoenolpyruvate carboxykinase (ATP) 5.9186660 0.0000373
K12755 MYL9; myosin regulatory light chain 9 5.8986788 0.0000010
K03319 TC.DASS; divalent anion:Na+ symporter, DASS family 5.8744409 0.0000259
K10752 RBBP4, HAT2, CAF1, MIS16; histone-binding protein RBBP4 5.7928033 0.0000036
K02517 lpxL, htrB; Kdo2-lipid IVA lauroyltransferase/acyltransferase 5.7843811 0.0001123
K17392 IGF2BP2; insulin-like growth factor 2 mRNA-binding protein 2 5.7651144 0.0000040
K00831 serC, PSAT1; phosphoserine aminotransferase 5.7394312 0.0000028
K02930 RP-L4e, RPL4; large subunit ribosomal protein L4e 5.6662498 0.0000003
K08301 rng, cafA; ribonuclease G 5.6512400 0.0000924
K06281 hyaB, hybC; hydrogenase large subunit 5.4295802 0.0001948
K04874 KCNA1, KV1.1; potassium voltage-gated channel Shaker-related subfamily A member 1 5.4132320 0.0000080
K03546 sbcC, rad50; DNA repair protein SbcC/Rad50 5.2338079 0.0017231
K16092 btuB; vitamin B12 transporter 4.7102953 0.0017984
K16495 PCDHGA; protocadherin gamma subfamily A 4.5153796 0.0000049
K00134 GAPDH, gapA; glyceraldehyde 3-phosphate dehydrogenase 1.7257634 0.0057157
K06478 PTPRC, CD45; receptor-type tyrosine-protein phosphatase C -0.3261990 0.0020246
K00522 FTH1; ferritin heavy chain -0.9651333 0.0070830
K03881 ND4; NADH-ubiquinone oxidoreductase chain 4 -1.3794228 0.0062609